diff options
Diffstat (limited to 'tvix/nix-compat/src/nar')
-rw-r--r-- | tvix/nix-compat/src/nar/mod.rs | 2 | ||||
-rw-r--r-- | tvix/nix-compat/src/nar/reader/async/mod.rs | 173 | ||||
-rw-r--r-- | tvix/nix-compat/src/nar/reader/async/read.rs | 69 | ||||
-rw-r--r-- | tvix/nix-compat/src/nar/reader/async/test.rs | 310 | ||||
-rw-r--r-- | tvix/nix-compat/src/nar/reader/mod.rs | 162 | ||||
-rw-r--r-- | tvix/nix-compat/src/nar/reader/read.rs | 32 | ||||
-rw-r--r-- | tvix/nix-compat/src/nar/reader/test.rs | 12 | ||||
-rw-r--r-- | tvix/nix-compat/src/nar/wire/mod.rs | 17 | ||||
-rw-r--r-- | tvix/nix-compat/src/nar/wire/tag.rs | 1 |
9 files changed, 690 insertions, 88 deletions
diff --git a/tvix/nix-compat/src/nar/mod.rs b/tvix/nix-compat/src/nar/mod.rs index 058977f4fc..c678d26ffb 100644 --- a/tvix/nix-compat/src/nar/mod.rs +++ b/tvix/nix-compat/src/nar/mod.rs @@ -1,4 +1,4 @@ -mod wire; +pub(crate) mod wire; pub mod reader; pub mod writer; diff --git a/tvix/nix-compat/src/nar/reader/async/mod.rs b/tvix/nix-compat/src/nar/reader/async/mod.rs new file mode 100644 index 0000000000..0808fba38c --- /dev/null +++ b/tvix/nix-compat/src/nar/reader/async/mod.rs @@ -0,0 +1,173 @@ +use std::{ + mem::MaybeUninit, + pin::Pin, + task::{self, Poll}, +}; + +use tokio::io::{self, AsyncBufRead, AsyncRead, ErrorKind::InvalidData}; + +// Required reading for understanding this module. +use crate::{ + nar::{self, wire::PadPar}, + wire::{self, BytesReader}, +}; + +mod read; +#[cfg(test)] +mod test; + +pub type Reader<'a> = dyn AsyncBufRead + Unpin + Send + 'a; + +/// Start reading a NAR file from `reader`. +pub async fn open<'a, 'r>(reader: &'a mut Reader<'r>) -> io::Result<Node<'a, 'r>> { + read::token(reader, &nar::wire::TOK_NAR).await?; + Node::new(reader).await +} + +pub enum Node<'a, 'r: 'a> { + Symlink { + target: Vec<u8>, + }, + File { + executable: bool, + reader: FileReader<'a, 'r>, + }, + Directory(DirReader<'a, 'r>), +} + +impl<'a, 'r: 'a> Node<'a, 'r> { + /// Start reading a [Node], matching the next [wire::Node]. + /// + /// Reading the terminating [wire::TOK_PAR] is done immediately for [Node::Symlink], + /// but is otherwise left to [DirReader] or [BytesReader]. + async fn new(reader: &'a mut Reader<'r>) -> io::Result<Self> { + Ok(match read::tag(reader).await? { + nar::wire::Node::Sym => { + let target = wire::read_bytes(reader, 1..=nar::wire::MAX_TARGET_LEN).await?; + + if target.contains(&0) { + return Err(InvalidData.into()); + } + + read::token(reader, &nar::wire::TOK_PAR).await?; + + Node::Symlink { target } + } + tag @ (nar::wire::Node::Reg | nar::wire::Node::Exe) => Node::File { + executable: tag == nar::wire::Node::Exe, + reader: FileReader { + inner: BytesReader::new_internal(reader, ..).await?, + }, + }, + nar::wire::Node::Dir => Node::Directory(DirReader::new(reader)), + }) + } +} + +/// File contents, readable through the [AsyncRead] trait. +/// +/// It comes with some caveats: +/// * You must always read the entire file, unless you intend to abandon the entire archive reader. +/// * You must abandon the entire archive reader upon the first error. +/// +/// It's fine to read exactly `reader.len()` bytes without ever seeing an explicit EOF. +pub struct FileReader<'a, 'r> { + inner: BytesReader<&'a mut Reader<'r>, PadPar>, +} + +impl<'a, 'r> FileReader<'a, 'r> { + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + pub fn len(&self) -> u64 { + self.inner.len() + } +} + +impl<'a, 'r> AsyncRead for FileReader<'a, 'r> { + fn poll_read( + self: Pin<&mut Self>, + cx: &mut task::Context, + buf: &mut io::ReadBuf, + ) -> Poll<io::Result<()>> { + Pin::new(&mut self.get_mut().inner).poll_read(cx, buf) + } +} + +impl<'a, 'r> AsyncBufRead for FileReader<'a, 'r> { + fn poll_fill_buf(self: Pin<&mut Self>, cx: &mut task::Context) -> Poll<io::Result<&[u8]>> { + Pin::new(&mut self.get_mut().inner).poll_fill_buf(cx) + } + + fn consume(self: Pin<&mut Self>, amt: usize) { + Pin::new(&mut self.get_mut().inner).consume(amt) + } +} + +/// A directory iterator, yielding a sequence of [Node]s. +/// It must be fully consumed before reading further from the [DirReader] that produced it, if any. +pub struct DirReader<'a, 'r> { + reader: &'a mut Reader<'r>, + /// Previous directory entry name. + /// We have to hang onto this to enforce name monotonicity. + prev_name: Vec<u8>, +} + +pub struct Entry<'a, 'r> { + pub name: &'a [u8], + pub node: Node<'a, 'r>, +} + +impl<'a, 'r> DirReader<'a, 'r> { + fn new(reader: &'a mut Reader<'r>) -> Self { + Self { + reader, + prev_name: vec![], + } + } + + /// Read the next [Entry] from the directory. + /// + /// We explicitly don't implement [Iterator], since treating this as + /// a regular Rust iterator will surely lead you astray. + /// + /// * You must always consume the entire iterator, unless you abandon the entire archive reader. + /// * You must abandon the entire archive reader on the first error. + /// * You must abandon the directory reader upon the first [None]. + /// * Even if you know the amount of elements up front, you must keep reading until you encounter [None]. + pub async fn next(&mut self) -> io::Result<Option<Entry<'_, 'r>>> { + // COME FROM the previous iteration: if we've already read an entry, + // read its terminating TOK_PAR here. + if !self.prev_name.is_empty() { + read::token(self.reader, &nar::wire::TOK_PAR).await?; + } + + if let nar::wire::Entry::None = read::tag(self.reader).await? { + return Ok(None); + } + + let mut name = [MaybeUninit::uninit(); nar::wire::MAX_NAME_LEN + 1]; + let name = + wire::read_bytes_buf(self.reader, &mut name, 1..=nar::wire::MAX_NAME_LEN).await?; + + if name.contains(&0) || name.contains(&b'/') || name == b"." || name == b".." { + return Err(InvalidData.into()); + } + + // Enforce strict monotonicity of directory entry names. + if &self.prev_name[..] >= name { + return Err(InvalidData.into()); + } + + self.prev_name.clear(); + self.prev_name.extend_from_slice(name); + + read::token(self.reader, &nar::wire::TOK_NOD).await?; + + Ok(Some(Entry { + name: &self.prev_name, + node: Node::new(self.reader).await?, + })) + } +} diff --git a/tvix/nix-compat/src/nar/reader/async/read.rs b/tvix/nix-compat/src/nar/reader/async/read.rs new file mode 100644 index 0000000000..2adf894922 --- /dev/null +++ b/tvix/nix-compat/src/nar/reader/async/read.rs @@ -0,0 +1,69 @@ +use tokio::io::{ + self, AsyncReadExt, + ErrorKind::{InvalidData, UnexpectedEof}, +}; + +use crate::nar::wire::Tag; + +use super::Reader; + +/// Consume a known token from the reader. +pub async fn token<const N: usize>(reader: &mut Reader<'_>, token: &[u8; N]) -> io::Result<()> { + let mut buf = [0u8; N]; + + // This implements something similar to [AsyncReadExt::read_exact], but verifies that + // the input data matches the token while we read it. These two slices respectively + // represent the remaining token to be verified, and the remaining input buffer. + let mut token = &token[..]; + let mut buf = &mut buf[..]; + + while !token.is_empty() { + match reader.read(buf).await? { + 0 => { + return Err(UnexpectedEof.into()); + } + n => { + let (t, b); + (t, token) = token.split_at(n); + (b, buf) = buf.split_at_mut(n); + + if t != b { + return Err(InvalidData.into()); + } + } + } + } + + Ok(()) +} + +/// Consume a [Tag] from the reader. +pub async fn tag<T: Tag>(reader: &mut Reader<'_>) -> io::Result<T> { + let mut buf = T::make_buf(); + let buf = buf.as_mut(); + + // first read the known minimum length… + reader.read_exact(&mut buf[..T::MIN]).await?; + + // then decide which tag we're expecting + let tag = T::from_u8(buf[T::OFF]).ok_or(InvalidData)?; + let (head, tail) = tag.as_bytes().split_at(T::MIN); + + // make sure what we've read so far is valid + if buf[..T::MIN] != *head { + return Err(InvalidData.into()); + } + + // …then read the rest, if any + if !tail.is_empty() { + let rest = tail.len(); + reader.read_exact(&mut buf[..rest]).await?; + + // and make sure it's what we expect + if buf[..rest] != *tail { + return Err(InvalidData.into()); + } + } + + Ok(tag) +} diff --git a/tvix/nix-compat/src/nar/reader/async/test.rs b/tvix/nix-compat/src/nar/reader/async/test.rs new file mode 100644 index 0000000000..7bc1f8942f --- /dev/null +++ b/tvix/nix-compat/src/nar/reader/async/test.rs @@ -0,0 +1,310 @@ +use tokio::io::AsyncReadExt; + +mod nar { + pub use crate::nar::reader::r#async as reader; +} + +#[tokio::test] +async fn symlink() { + let mut f = std::io::Cursor::new(include_bytes!("../../tests/symlink.nar")); + let node = nar::reader::open(&mut f).await.unwrap(); + + match node { + nar::reader::Node::Symlink { target } => { + assert_eq!( + &b"/nix/store/somewhereelse"[..], + &target, + "target must match" + ); + } + _ => panic!("unexpected type"), + } +} + +#[tokio::test] +async fn file() { + let mut f = std::io::Cursor::new(include_bytes!("../../tests/helloworld.nar")); + let node = nar::reader::open(&mut f).await.unwrap(); + + match node { + nar::reader::Node::File { + executable, + mut reader, + } => { + assert!(!executable); + let mut buf = vec![]; + reader + .read_to_end(&mut buf) + .await + .expect("read must succeed"); + assert_eq!(&b"Hello World!"[..], &buf); + } + _ => panic!("unexpected type"), + } +} + +#[tokio::test] +async fn complicated() { + let mut f = std::io::Cursor::new(include_bytes!("../../tests/complicated.nar")); + let node = nar::reader::open(&mut f).await.unwrap(); + + match node { + nar::reader::Node::Directory(mut dir_reader) => { + // first entry is .keep, an empty regular file. + must_read_file( + ".keep", + dir_reader + .next() + .await + .expect("next must succeed") + .expect("must be some"), + ) + .await; + + // second entry is aa, a symlink to /nix/store/somewhereelse + must_be_symlink( + "aa", + "/nix/store/somewhereelse", + dir_reader + .next() + .await + .expect("next must be some") + .expect("must be some"), + ); + + { + // third entry is a directory called "keep" + let entry = dir_reader + .next() + .await + .expect("next must be some") + .expect("must be some"); + + assert_eq!(b"keep", entry.name); + + match entry.node { + nar::reader::Node::Directory(mut subdir_reader) => { + { + // first entry is .keep, an empty regular file. + let entry = subdir_reader + .next() + .await + .expect("next must succeed") + .expect("must be some"); + + must_read_file(".keep", entry).await; + } + + // we must read the None + assert!( + subdir_reader + .next() + .await + .expect("next must succeed") + .is_none(), + "keep directory contains only .keep" + ); + } + _ => panic!("unexpected type for keep/.keep"), + } + }; + + // reading more entries yields None (and we actually must read until this) + assert!(dir_reader.next().await.expect("must succeed").is_none()); + } + _ => panic!("unexpected type"), + } +} + +#[tokio::test] +#[should_panic] +#[ignore = "TODO: async poisoning"] +async fn file_read_abandoned() { + let mut f = std::io::Cursor::new(include_bytes!("../../tests/complicated.nar")); + let node = nar::reader::open(&mut f).await.unwrap(); + + match node { + nar::reader::Node::Directory(mut dir_reader) => { + // first entry is .keep, an empty regular file. + { + let entry = dir_reader + .next() + .await + .expect("next must succeed") + .expect("must be some"); + + assert_eq!(b".keep", entry.name); + // don't bother to finish reading it. + }; + + // this should panic (not return an error), because we are meant to abandon the archive reader now. + assert!(dir_reader.next().await.expect("must succeed").is_none()); + } + _ => panic!("unexpected type"), + } +} + +#[tokio::test] +#[should_panic] +#[ignore = "TODO: async poisoning"] +async fn dir_read_abandoned() { + let mut f = std::io::Cursor::new(include_bytes!("../../tests/complicated.nar")); + let node = nar::reader::open(&mut f).await.unwrap(); + + match node { + nar::reader::Node::Directory(mut dir_reader) => { + // first entry is .keep, an empty regular file. + must_read_file( + ".keep", + dir_reader + .next() + .await + .expect("next must succeed") + .expect("must be some"), + ) + .await; + + // second entry is aa, a symlink to /nix/store/somewhereelse + must_be_symlink( + "aa", + "/nix/store/somewhereelse", + dir_reader + .next() + .await + .expect("next must be some") + .expect("must be some"), + ); + + { + // third entry is a directory called "keep" + let entry = dir_reader + .next() + .await + .expect("next must be some") + .expect("must be some"); + + assert_eq!(b"keep", entry.name); + + match entry.node { + nar::reader::Node::Directory(_) => { + // don't finish using it, which poisons the archive reader + } + _ => panic!("unexpected type for keep/.keep"), + } + }; + + // this should panic, because we didn't finish reading the child subdirectory + assert!(dir_reader.next().await.expect("must succeed").is_none()); + } + _ => panic!("unexpected type"), + } +} + +#[tokio::test] +#[should_panic] +#[ignore = "TODO: async poisoning"] +async fn dir_read_after_none() { + let mut f = std::io::Cursor::new(include_bytes!("../../tests/complicated.nar")); + let node = nar::reader::open(&mut f).await.unwrap(); + + match node { + nar::reader::Node::Directory(mut dir_reader) => { + // first entry is .keep, an empty regular file. + must_read_file( + ".keep", + dir_reader + .next() + .await + .expect("next must succeed") + .expect("must be some"), + ) + .await; + + // second entry is aa, a symlink to /nix/store/somewhereelse + must_be_symlink( + "aa", + "/nix/store/somewhereelse", + dir_reader + .next() + .await + .expect("next must be some") + .expect("must be some"), + ); + + { + // third entry is a directory called "keep" + let entry = dir_reader + .next() + .await + .expect("next must be some") + .expect("must be some"); + + assert_eq!(b"keep", entry.name); + + match entry.node { + nar::reader::Node::Directory(mut subdir_reader) => { + // first entry is .keep, an empty regular file. + must_read_file( + ".keep", + subdir_reader + .next() + .await + .expect("next must succeed") + .expect("must be some"), + ) + .await; + + // we must read the None + assert!( + subdir_reader + .next() + .await + .expect("next must succeed") + .is_none(), + "keep directory contains only .keep" + ); + } + _ => panic!("unexpected type for keep/.keep"), + } + }; + + // reading more entries yields None (and we actually must read until this) + assert!(dir_reader.next().await.expect("must succeed").is_none()); + + // this should panic, because we already got a none so we're meant to stop. + dir_reader.next().await.unwrap(); + unreachable!() + } + _ => panic!("unexpected type"), + } +} + +async fn must_read_file(name: &'static str, entry: nar::reader::Entry<'_, '_>) { + assert_eq!(name.as_bytes(), entry.name); + + match entry.node { + nar::reader::Node::File { + executable, + mut reader, + } => { + assert!(!executable); + assert_eq!(reader.read(&mut [0]).await.unwrap(), 0); + } + _ => panic!("unexpected type for {}", name), + } +} + +fn must_be_symlink( + name: &'static str, + exp_target: &'static str, + entry: nar::reader::Entry<'_, '_>, +) { + assert_eq!(name.as_bytes(), entry.name); + + match entry.node { + nar::reader::Node::Symlink { target } => { + assert_eq!(exp_target.as_bytes(), &target); + } + _ => panic!("unexpected type for {}", name), + } +} diff --git a/tvix/nix-compat/src/nar/reader/mod.rs b/tvix/nix-compat/src/nar/reader/mod.rs index 75463a6450..7e9143c8f3 100644 --- a/tvix/nix-compat/src/nar/reader/mod.rs +++ b/tvix/nix-compat/src/nar/reader/mod.rs @@ -10,9 +10,15 @@ use std::io::{ Read, Write, }; +#[cfg(not(debug_assertions))] +use std::marker::PhantomData; + // Required reading for understanding this module. use crate::nar::wire; +#[cfg(all(feature = "async", feature = "wire"))] +pub mod r#async; + mod read; #[cfg(test)] mod test; @@ -27,25 +33,15 @@ struct ArchiveReader<'a, 'r> { /// * An error is encountered at any point /// * A file or directory reader is dropped before being read entirely. /// All of these checks vanish in release mode. - #[cfg(debug_assertions)] status: ArchiveReaderStatus<'a>, } -macro_rules! poison { - ($it:expr) => { - #[cfg(debug_assertions)] - { - $it.status.poison(); - } - }; -} - macro_rules! try_or_poison { ($it:expr, $ex:expr) => { match $ex { Ok(x) => x, Err(e) => { - poison!($it); + $it.status.poison(); return Err(e.into()); } } @@ -56,11 +52,7 @@ pub fn open<'a, 'r>(reader: &'a mut Reader<'r>) -> io::Result<Node<'a, 'r>> { read::token(reader, &wire::TOK_NAR)?; Node::new(ArchiveReader { inner: reader, - #[cfg(debug_assertions)] - status: ArchiveReaderStatus::StackTop { - poisoned: false, - ready: true, - }, + status: ArchiveReaderStatus::top(), }) } @@ -80,7 +72,6 @@ impl<'a, 'r> Node<'a, 'r> { /// /// Reading the terminating [wire::TOK_PAR] is done immediately for [Node::Symlink], /// but is otherwise left to [DirReader] or [FileReader]. - #[allow(unused_mut)] // due to debug_assertions code fn new(mut reader: ArchiveReader<'a, 'r>) -> io::Result<Self> { Ok(match read::tag(reader.inner)? { wire::Node::Sym => { @@ -88,15 +79,12 @@ impl<'a, 'r> Node<'a, 'r> { try_or_poison!(reader, read::bytes(reader.inner, wire::MAX_TARGET_LEN)); if target.is_empty() || target.contains(&0) { - poison!(reader); + reader.status.poison(); return Err(InvalidData.into()); } try_or_poison!(reader, read::token(reader.inner, &wire::TOK_PAR)); - #[cfg(debug_assertions)] - { - reader.status.ready_parent(); // Immediately allow reading from parent again - } + reader.status.ready_parent(); // Immediately allow reading from parent again Node::Symlink { target } } @@ -131,17 +119,13 @@ pub struct FileReader<'a, 'r> { impl<'a, 'r> FileReader<'a, 'r> { /// Instantiate a new reader, starting after [wire::TOK_REG] or [wire::TOK_EXE]. /// We handle the terminating [wire::TOK_PAR] on semantic EOF. - #[allow(unused_mut)] // due to debug_assertions code fn new(mut reader: ArchiveReader<'a, 'r>, len: u64) -> io::Result<Self> { // For zero-length files, we have to read the terminating TOK_PAR // immediately, since FileReader::read may never be called; we've // already reached semantic EOF by definition. if len == 0 { read::token(reader.inner, &wire::TOK_PAR)?; - #[cfg(debug_assertions)] - { - reader.status.ready_parent(); - } + reader.status.ready_parent(); } Ok(Self { @@ -175,7 +159,7 @@ impl FileReader<'_, '_> { let mut buf = try_or_poison!(self.reader, self.reader.inner.fill_buf()); if buf.is_empty() { - poison!(self.reader); + self.reader.status.poison(); return Err(UnexpectedEof.into()); } @@ -237,7 +221,7 @@ impl Read for FileReader<'_, '_> { self.len -= n as u64; if n == 0 { - poison!(self.reader); + self.reader.status.poison(); return Err(UnexpectedEof.into()); } @@ -260,18 +244,15 @@ impl FileReader<'_, '_> { try_or_poison!(self.reader, self.reader.inner.read_exact(&mut buf[pad..])); if buf != [0; 8] { - poison!(self.reader); + self.reader.status.poison(); return Err(InvalidData.into()); } } try_or_poison!(self.reader, read::token(self.reader.inner, &wire::TOK_PAR)); - #[cfg(debug_assertions)] - { - // Done with reading this file, allow going back up the chain of readers - self.reader.status.ready_parent(); - } + // Done with reading this file, allow going back up the chain of readers + self.reader.status.ready_parent(); Ok(()) } @@ -283,11 +264,11 @@ pub struct DirReader<'a, 'r> { reader: ArchiveReader<'a, 'r>, /// Previous directory entry name. /// We have to hang onto this to enforce name monotonicity. - prev_name: Option<Vec<u8>>, + prev_name: Vec<u8>, } pub struct Entry<'a, 'r> { - pub name: Vec<u8>, + pub name: &'a [u8], pub node: Node<'a, 'r>, } @@ -295,7 +276,7 @@ impl<'a, 'r> DirReader<'a, 'r> { fn new(reader: ArchiveReader<'a, 'r>) -> Self { Self { reader, - prev_name: None, + prev_name: vec![], } } @@ -314,23 +295,21 @@ impl<'a, 'r> DirReader<'a, 'r> { // COME FROM the previous iteration: if we've already read an entry, // read its terminating TOK_PAR here. - if self.prev_name.is_some() { + if !self.prev_name.is_empty() { try_or_poison!(self.reader, read::token(self.reader.inner, &wire::TOK_PAR)); } // Determine if there are more entries to follow if let wire::Entry::None = try_or_poison!(self.reader, read::tag(self.reader.inner)) { // We've reached the end of this directory. - #[cfg(debug_assertions)] - { - self.reader.status.ready_parent(); - } + self.reader.status.ready_parent(); return Ok(None); } + let mut name = [0; wire::MAX_NAME_LEN + 1]; let name = try_or_poison!( self.reader, - read::bytes(self.reader.inner, wire::MAX_NAME_LEN) + read::bytes_buf(self.reader.inner, &mut name, wire::MAX_NAME_LEN) ); if name.is_empty() @@ -339,29 +318,23 @@ impl<'a, 'r> DirReader<'a, 'r> { || name == b"." || name == b".." { - poison!(self.reader); + self.reader.status.poison(); return Err(InvalidData.into()); } // Enforce strict monotonicity of directory entry names. - match &mut self.prev_name { - None => { - self.prev_name = Some(name.clone()); - } - Some(prev_name) => { - if *prev_name >= name { - poison!(self.reader); - return Err(InvalidData.into()); - } - - name[..].clone_into(prev_name); - } + if &self.prev_name[..] >= name { + self.reader.status.poison(); + return Err(InvalidData.into()); } + self.prev_name.clear(); + self.prev_name.extend_from_slice(name); + try_or_poison!(self.reader, read::token(self.reader.inner, &wire::TOK_NOD)); Ok(Some(Entry { - name, + name: &self.prev_name, // Don't need to worry about poisoning here: Node::new will do it for us if needed node: Node::new(self.reader.child())?, })) @@ -373,12 +346,12 @@ impl<'a, 'r> DirReader<'a, 'r> { /// so we can check they are abandoned when an error occurs /// * Make sure only the most recently created object is read from, and is fully exhausted /// before anything it was created from is used again. -#[cfg(debug_assertions)] enum ArchiveReaderStatus<'a> { - StackTop { - poisoned: bool, - ready: bool, - }, + #[cfg(not(debug_assertions))] + None(PhantomData<&'a ()>), + #[cfg(debug_assertions)] + StackTop { poisoned: bool, ready: bool }, + #[cfg(debug_assertions)] StackChild { poisoned: &'a mut bool, parent_ready: &'a mut bool, @@ -386,12 +359,28 @@ enum ArchiveReaderStatus<'a> { }, } -#[cfg(debug_assertions)] impl ArchiveReaderStatus<'_> { + fn top() -> Self { + #[cfg(debug_assertions)] + { + ArchiveReaderStatus::StackTop { + poisoned: false, + ready: true, + } + } + + #[cfg(not(debug_assertions))] + ArchiveReaderStatus::None(PhantomData) + } + /// Poison all the objects sharing the same reader, to be used when an error occurs fn poison(&mut self) { match self { + #[cfg(not(debug_assertions))] + ArchiveReaderStatus::None(_) => {} + #[cfg(debug_assertions)] ArchiveReaderStatus::StackTop { poisoned: x, .. } => *x = true, + #[cfg(debug_assertions)] ArchiveReaderStatus::StackChild { poisoned: x, .. } => **x = true, } } @@ -399,10 +388,14 @@ impl ArchiveReaderStatus<'_> { /// Mark the parent as ready, allowing it to be used again and preventing this reference to the reader being used again. fn ready_parent(&mut self) { match self { - Self::StackTop { ready, .. } => { + #[cfg(not(debug_assertions))] + ArchiveReaderStatus::None(_) => {} + #[cfg(debug_assertions)] + ArchiveReaderStatus::StackTop { ready, .. } => { *ready = false; } - Self::StackChild { + #[cfg(debug_assertions)] + ArchiveReaderStatus::StackChild { ready, parent_ready, .. @@ -415,15 +408,23 @@ impl ArchiveReaderStatus<'_> { fn poisoned(&self) -> bool { match self { - Self::StackTop { poisoned, .. } => *poisoned, - Self::StackChild { poisoned, .. } => **poisoned, + #[cfg(not(debug_assertions))] + ArchiveReaderStatus::None(_) => false, + #[cfg(debug_assertions)] + ArchiveReaderStatus::StackTop { poisoned, .. } => *poisoned, + #[cfg(debug_assertions)] + ArchiveReaderStatus::StackChild { poisoned, .. } => **poisoned, } } fn ready(&self) -> bool { match self { - Self::StackTop { ready, .. } => *ready, - Self::StackChild { ready, .. } => *ready, + #[cfg(not(debug_assertions))] + ArchiveReaderStatus::None(_) => true, + #[cfg(debug_assertions)] + ArchiveReaderStatus::StackTop { ready, .. } => *ready, + #[cfg(debug_assertions)] + ArchiveReaderStatus::StackChild { ready, .. } => *ready, } } } @@ -434,6 +435,8 @@ impl<'a, 'r> ArchiveReader<'a, 'r> { fn child(&mut self) -> ArchiveReader<'_, 'r> { ArchiveReader { inner: self.inner, + #[cfg(not(debug_assertions))] + status: ArchiveReaderStatus::None(PhantomData), #[cfg(debug_assertions)] status: match &mut self.status { ArchiveReaderStatus::StackTop { poisoned, ready } => { @@ -462,16 +465,13 @@ impl<'a, 'r> ArchiveReader<'a, 'r> { /// Only does anything when debug assertions are on. #[inline(always)] fn check_correct(&self) { - #[cfg(debug_assertions)] - { - debug_assert!( - !self.status.poisoned(), - "Archive reader used after it was meant to be abandoned!" - ); - debug_assert!( - self.status.ready(), - "Non-ready archive reader used! (Should've been reading from something else)" - ) - } + assert!( + !self.status.poisoned(), + "Archive reader used after it was meant to be abandoned!" + ); + assert!( + self.status.ready(), + "Non-ready archive reader used! (Should've been reading from something else)" + ); } } diff --git a/tvix/nix-compat/src/nar/reader/read.rs b/tvix/nix-compat/src/nar/reader/read.rs index 1ce1613764..9938581f2a 100644 --- a/tvix/nix-compat/src/nar/reader/read.rs +++ b/tvix/nix-compat/src/nar/reader/read.rs @@ -15,6 +15,38 @@ pub fn u64(reader: &mut Reader) -> io::Result<u64> { Ok(u64::from_le_bytes(buf)) } +/// Consume a byte string from the reader into a provided buffer, +/// returning the data bytes. +pub fn bytes_buf<'a, const N: usize>( + reader: &mut Reader, + buf: &'a mut [u8; N], + max_len: usize, +) -> io::Result<&'a [u8]> { + assert_eq!(N % 8, 0); + assert!(max_len <= N); + + // read the length, and reject excessively large values + let len = self::u64(reader)?; + if len > max_len as u64 { + return Err(InvalidData.into()); + } + // we know the length fits in a usize now + let len = len as usize; + + // read the data and padding into a buffer + let buf_len = (len + 7) & !7; + reader.read_exact(&mut buf[..buf_len])?; + + // verify that the padding is all zeroes + for &b in &buf[len..buf_len] { + if b != 0 { + return Err(InvalidData.into()); + } + } + + Ok(&buf[..len]) +} + /// Consume a byte string of up to `max_len` bytes from the reader. pub fn bytes(reader: &mut Reader, max_len: usize) -> io::Result<Vec<u8>> { assert!(max_len <= isize::MAX as usize); diff --git a/tvix/nix-compat/src/nar/reader/test.rs b/tvix/nix-compat/src/nar/reader/test.rs index 02dc4767c9..63e4fb289f 100644 --- a/tvix/nix-compat/src/nar/reader/test.rs +++ b/tvix/nix-compat/src/nar/reader/test.rs @@ -71,7 +71,7 @@ fn complicated() { .expect("next must be some") .expect("must be some"); - assert_eq!(&b"keep"[..], &entry.name); + assert_eq!(b"keep", entry.name); match entry.node { nar::reader::Node::Directory(mut subdir_reader) => { @@ -117,7 +117,7 @@ fn file_read_abandoned() { .expect("next must succeed") .expect("must be some"); - assert_eq!(&b".keep"[..], &entry.name); + assert_eq!(b".keep", entry.name); // don't bother to finish reading it. }; @@ -162,7 +162,7 @@ fn dir_read_abandoned() { .expect("next must be some") .expect("must be some"); - assert_eq!(&b"keep"[..], &entry.name); + assert_eq!(b"keep", entry.name); match entry.node { nar::reader::Node::Directory(_) => { @@ -213,7 +213,7 @@ fn dir_read_after_none() { .expect("next must be some") .expect("must be some"); - assert_eq!(&b"keep"[..], &entry.name); + assert_eq!(b"keep", entry.name); match entry.node { nar::reader::Node::Directory(mut subdir_reader) => { @@ -248,7 +248,7 @@ fn dir_read_after_none() { } fn must_read_file(name: &'static str, entry: nar::reader::Entry<'_, '_>) { - assert_eq!(name.as_bytes(), &entry.name); + assert_eq!(name.as_bytes(), entry.name); match entry.node { nar::reader::Node::File { @@ -267,7 +267,7 @@ fn must_be_symlink( exp_target: &'static str, entry: nar::reader::Entry<'_, '_>, ) { - assert_eq!(name.as_bytes(), &entry.name); + assert_eq!(name.as_bytes(), entry.name); match entry.node { nar::reader::Node::Symlink { target } => { diff --git a/tvix/nix-compat/src/nar/wire/mod.rs b/tvix/nix-compat/src/nar/wire/mod.rs index b9e0212495..9e99b530ce 100644 --- a/tvix/nix-compat/src/nar/wire/mod.rs +++ b/tvix/nix-compat/src/nar/wire/mod.rs @@ -90,6 +90,23 @@ pub const TOK_DIR: [u8; 24] = *b"\x09\0\0\0\0\0\0\0directory\0\0\0\0\0\0\0"; pub const TOK_ENT: [u8; 48] = *b"\x05\0\0\0\0\0\0\0entry\0\0\0\x01\0\0\0\0\0\0\0(\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0name\0\0\0\0"; pub const TOK_NOD: [u8; 48] = *b"\x04\0\0\0\0\0\0\0node\0\0\0\0\x01\0\0\0\0\0\0\0(\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0type\0\0\0\0"; pub const TOK_PAR: [u8; 16] = *b"\x01\0\0\0\0\0\0\0)\0\0\0\0\0\0\0"; +#[cfg(feature = "async")] +const TOK_PAD_PAR: [u8; 24] = *b"\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0)\0\0\0\0\0\0\0"; + +#[cfg(feature = "async")] +#[derive(Debug)] +pub(crate) enum PadPar {} + +#[cfg(feature = "async")] +impl crate::wire::reader::Tag for PadPar { + const PATTERN: &'static [u8] = &TOK_PAD_PAR; + + type Buf = [u8; 24]; + + fn make_buf() -> Self::Buf { + [0; 24] + } +} #[test] fn tokens() { diff --git a/tvix/nix-compat/src/nar/wire/tag.rs b/tvix/nix-compat/src/nar/wire/tag.rs index 55b93f9985..4982a0d707 100644 --- a/tvix/nix-compat/src/nar/wire/tag.rs +++ b/tvix/nix-compat/src/nar/wire/tag.rs @@ -10,6 +10,7 @@ pub trait Tag: Sized { const MIN: usize; /// Minimal suitably sized buffer for reading the wire representation + /// /// HACK: This is a workaround for const generics limitations. type Buf: AsMut<[u8]> + Send; |