From 43c851bc841bccc65ffddab7205783c43f25417f Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Thu, 14 Mar 2024 13:50:56 +0200 Subject: refactor(nix-compat/store_path): take [u8;32] for outer fingerprint The outer fingerprint used for store path calculation is always a sha256 digest. This includes both input and output-addressed store paths. We used a NixHash here, which can also represent other hash types, and that had a bunch of annoyances: - Whenever we had the bytes, we had to wrap them in a NixHash::Sha256(). - Things like AtermWriteable had to be implemented on NixHash, even though we then had an assertion it was only called in the NixHash::Sha256 case. Change-Id: Ic895503d9b071800d2e52ae057666f44bd0ab9d6 Reviewed-on: https://cl.tvl.fyi/c/depot/+/11142 Tested-by: BuildkiteCI Autosubmit: flokli Reviewed-by: John Ericson Reviewed-by: picnoir picnoir --- tvix/glue/src/known_paths.rs | 18 ++++++------- tvix/nix-compat/src/derivation/mod.rs | 14 +++++------ tvix/nix-compat/src/derivation/tests/mod.rs | 12 ++++----- tvix/nix-compat/src/derivation/write.rs | 14 ++++------- tvix/nix-compat/src/store_path/utils.rs | 39 ++++++++++++++--------------- 5 files changed, 46 insertions(+), 51 deletions(-) diff --git a/tvix/glue/src/known_paths.rs b/tvix/glue/src/known_paths.rs index 13f86fae0e..9cd9470fa9 100644 --- a/tvix/glue/src/known_paths.rs +++ b/tvix/glue/src/known_paths.rs @@ -8,7 +8,7 @@ //! This data is required to find the derivation needed to actually trigger the //! build, if necessary. -use nix_compat::{derivation::Derivation, nixhash::NixHash, store_path::StorePath}; +use nix_compat::{derivation::Derivation, store_path::StorePath}; use std::collections::HashMap; /// Struct keeping track of all known Derivations in the current evaluation. @@ -20,7 +20,7 @@ pub struct KnownPaths { /// /// Keys are derivation paths, values are a tuple of the "hash derivation /// modulo" and the Derivation struct itself. - derivations: HashMap, + derivations: HashMap, /// A map from output path to (one) drv path. /// Note that in the case of FODs, multiple drvs can produce the same output @@ -30,7 +30,7 @@ pub struct KnownPaths { impl KnownPaths { /// Fetch the opaque "hash derivation modulo" for a given derivation path. - pub fn get_hash_derivation_modulo(&self, drv_path: &StorePath) -> Option<&NixHash> { + pub fn get_hash_derivation_modulo(&self, drv_path: &StorePath) -> Option<&[u8; 32]> { self.derivations .get(drv_path) .map(|(hash_derivation_modulo, _derivation)| hash_derivation_modulo) @@ -83,7 +83,7 @@ impl KnownPaths { #[allow(unused_variables)] // assertions on this only compiled in debug builds let old = self .derivations - .insert(drv_path.to_owned(), (hash_derivation_modulo.clone(), drv)); + .insert(drv_path.to_owned(), (hash_derivation_modulo, drv)); #[cfg(debug_assertions)] { @@ -99,7 +99,7 @@ impl KnownPaths { #[cfg(test)] mod tests { - use nix_compat::{derivation::Derivation, nixhash::NixHash, store_path::StorePath}; + use nix_compat::{derivation::Derivation, store_path::StorePath}; use super::KnownPaths; use hex_literal::hex; @@ -165,9 +165,9 @@ mod tests { // It should be possible to get the hash derivation modulo. assert_eq!( - Some(&NixHash::Sha256(hex!( + Some(&hex!( "c79aebd0ce3269393d4a1fde2cbd1d975d879b40f0bf40a48f550edc107fd5df" - ))), + )), known_paths.get_hash_derivation_modulo(&BAR_DRV_PATH.clone()) ); @@ -180,9 +180,9 @@ mod tests { known_paths.get_drv_by_drvpath(&FOO_DRV_PATH) ); assert_eq!( - Some(&NixHash::Sha256(hex!( + Some(&hex!( "af030d36d63d3d7f56a71adaba26b36f5fa1f9847da5eed953ed62e18192762f" - ))), + )), known_paths.get_hash_derivation_modulo(&FOO_DRV_PATH.clone()) ); diff --git a/tvix/nix-compat/src/derivation/mod.rs b/tvix/nix-compat/src/derivation/mod.rs index 848f1a31ff..9a7dd04c11 100644 --- a/tvix/nix-compat/src/derivation/mod.rs +++ b/tvix/nix-compat/src/derivation/mod.rs @@ -178,8 +178,8 @@ impl Derivation { /// /// This is called `hashDerivationModulo` in nixcpp. /// - /// It returns a [NixHash], created by calculating the sha256 digest of - /// the derivation ATerm representation, except that: + /// It returns the sha256 digest of the derivation ATerm representation, + /// except that: /// - any input derivation paths have beed replaced "by the result of a /// recursive call to this function" and that /// - for fixed-output derivations the special @@ -190,16 +190,16 @@ impl Derivation { /// this function to provide a lookup function to lookup these calculation /// results of parent derivations at `fn_get_derivation_or_fod_hash` (by /// drv path). - pub fn derivation_or_fod_hash(&self, fn_get_derivation_or_fod_hash: F) -> NixHash + pub fn derivation_or_fod_hash(&self, fn_get_derivation_or_fod_hash: F) -> [u8; 32] where - F: Fn(&StorePathRef) -> NixHash, + F: Fn(&StorePathRef) -> [u8; 32], { // Fixed-output derivations return a fixed hash. // Non-Fixed-output derivations return the sha256 digest of the ATerm // notation, but with all input_derivation paths replaced by a recursive // call to this function. // We use fn_get_derivation_or_fod_hash here, so callers can precompute this. - NixHash::Sha256(self.fod_digest().unwrap_or({ + self.fod_digest().unwrap_or({ // For each input_derivation, look up the // derivation_or_fod_hash, and replace the derivation path with // it's HEXLOWER digest. @@ -216,7 +216,7 @@ impl Derivation { hasher.update(self.to_aterm_bytes_with_replacements(&input_derivations)); hasher.finalize().into() - })) + }) } /// This calculates all output paths of a Derivation and updates the struct. @@ -238,7 +238,7 @@ impl Derivation { pub fn calculate_output_paths( &mut self, name: &str, - derivation_or_fod_hash: &NixHash, + derivation_or_fod_hash: &[u8; 32], ) -> Result<(), DerivationError> { // The fingerprint and hash differs per output for (output_name, output) in self.outputs.iter_mut() { diff --git a/tvix/nix-compat/src/derivation/tests/mod.rs b/tvix/nix-compat/src/derivation/tests/mod.rs index 2bf09265bc..56bad7869d 100644 --- a/tvix/nix-compat/src/derivation/tests/mod.rs +++ b/tvix/nix-compat/src/derivation/tests/mod.rs @@ -5,6 +5,7 @@ use crate::derivation::parser::Error; use crate::derivation::Derivation; use crate::store_path::StorePath; use bstr::{BStr, BString}; +use hex_literal::hex; use std::collections::BTreeSet; use std::fs::File; use std::io::Read; @@ -184,16 +185,15 @@ fn derivation_with_trimmed_output_paths(derivation: &Derivation) -> Derivation { } } -#[test_case("0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv", "sha256:724f3e3634fce4cbbbd3483287b8798588e80280660b9a63fd13a1bc90485b33"; "fixed_sha256")] -#[test_case("ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv", "sha256:c79aebd0ce3269393d4a1fde2cbd1d975d879b40f0bf40a48f550edc107fd5df";"fixed-sha1")] -fn derivation_or_fod_hash(drv_path: &str, expected_nix_hash_string: &str) { +#[test_case("0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv", hex!("724f3e3634fce4cbbbd3483287b8798588e80280660b9a63fd13a1bc90485b33"); "fixed_sha256")] +#[test_case("ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv", hex!("c79aebd0ce3269393d4a1fde2cbd1d975d879b40f0bf40a48f550edc107fd5df");"fixed-sha1")] +fn derivation_or_fod_hash(drv_path: &str, expected_digest: [u8; 32]) { // read in the fixture let json_bytes = read_file(&format!("{}/ok/{}.json", RESOURCES_PATHS, drv_path)); let drv: Derivation = serde_json::from_slice(&json_bytes).expect("must deserialize"); let actual = drv.derivation_or_fod_hash(|_| panic!("must not be called")); - - assert_eq!(expected_nix_hash_string, actual.to_nix_hex_string()); + assert_eq!(expected_digest, actual); } /// This reads a Derivation (in A-Term), trims out all fields containing @@ -401,7 +401,7 @@ fn output_path_construction() { if drv_path.to_string() != "0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv" { panic!("lookup called with unexpected drv_path: {}", drv_path); } - bar_drv_derivation_or_fod_hash.clone() + bar_drv_derivation_or_fod_hash }), ); assert!(foo_calc_result.is_ok()); diff --git a/tvix/nix-compat/src/derivation/write.rs b/tvix/nix-compat/src/derivation/write.rs index f20bf4e121..735b781574 100644 --- a/tvix/nix-compat/src/derivation/write.rs +++ b/tvix/nix-compat/src/derivation/write.rs @@ -8,7 +8,8 @@ use crate::derivation::{ca_kind_prefix, output::Output}; use crate::nixbase32; use crate::store_path::{StorePath, StorePathRef, STORE_DIR_WITH_SLASH}; use bstr::BString; -use std::fmt::Display; +use data_encoding::HEXLOWER; + use std::{ collections::{BTreeMap, BTreeSet}, io, @@ -16,8 +17,6 @@ use std::{ io::Write, }; -use super::NixHash; - pub const DERIVATION_PREFIX: &str = "Derive"; pub const PAREN_OPEN: char = '('; pub const PAREN_CLOSE: char = ')'; @@ -31,7 +30,7 @@ pub const QUOTE: char = '"'; /// Note that we mostly use explicit `write_*` calls /// instead since the serialization of the items depends on /// the context a lot. -pub(crate) trait AtermWriteable: Display { +pub(crate) trait AtermWriteable { fn aterm_write(&self, writer: &mut impl Write) -> std::io::Result<()>; fn aterm_bytes(&self) -> Vec { @@ -67,12 +66,9 @@ impl AtermWriteable for String { } } -impl AtermWriteable for NixHash { +impl AtermWriteable for [u8; 32] { fn aterm_write(&self, writer: &mut impl Write) -> std::io::Result<()> { - // When we serialize the placeholder hashes, - // they need to be SHA256. - debug_assert!(matches!(self, NixHash::Sha256(_))); - write_field(writer, self.to_plain_hex_string(), false) + write_field(writer, HEXLOWER.encode(self), false) } } diff --git a/tvix/nix-compat/src/store_path/utils.rs b/tvix/nix-compat/src/store_path/utils.rs index 2289594e3a..d054d59556 100644 --- a/tvix/nix-compat/src/store_path/utils.rs +++ b/tvix/nix-compat/src/store_path/utils.rs @@ -1,8 +1,8 @@ use crate::nixbase32; use crate::nixhash::{CAHash, NixHash}; use crate::store_path::{Error, StorePathRef, DIGEST_SIZE, STORE_DIR}; +use data_encoding::HEXLOWER; use sha2::{Digest, Sha256}; -use std::io::Write; use thiserror; /// Errors that can occur when creating a content-addressed store path. @@ -66,14 +66,11 @@ pub fn build_ca_path<'a, S: AsRef, I: IntoIterator>( return Err(BuildStorePathError::InvalidReference()); } - let (ty, hash) = match &ca_hash { - CAHash::Text(ref digest) => ( - make_references_string("text", references, false), - NixHash::Sha256(*digest), - ), + let (ty, inner_digest) = match &ca_hash { + CAHash::Text(ref digest) => (make_references_string("text", references, false), *digest), CAHash::Nar(NixHash::Sha256(ref digest)) => ( make_references_string("source", references, self_reference), - NixHash::Sha256(*digest), + *digest, ), // for all other CAHash::Nar, another custom scheme is used. @@ -84,7 +81,7 @@ pub fn build_ca_path<'a, S: AsRef, I: IntoIterator>( ( "output:out".to_string(), - NixHash::Sha256(fixed_out_digest("fixed:out:r", hash)), + fixed_out_digest("fixed:out:r", hash), ) } // CaHash::Flat is using something very similar, except the `r:` prefix. @@ -95,12 +92,12 @@ pub fn build_ca_path<'a, S: AsRef, I: IntoIterator>( ( "output:out".to_string(), - NixHash::Sha256(fixed_out_digest("fixed:out", hash)), + fixed_out_digest("fixed:out", hash), ) } }; - build_store_path_from_fingerprint_parts(&ty, &hash, name) + build_store_path_from_fingerprint_parts(&ty, &inner_digest, name) .map_err(BuildStorePathError::InvalidStorePath) } @@ -128,13 +125,13 @@ pub fn build_nar_based_store_path<'a>( /// Input-addresed store paths are always derivation outputs, the "input" in question is the /// derivation and its closure. pub fn build_output_path<'a>( - drv_hash: &NixHash, + drv_sha256: &[u8; 32], output_name: &str, output_path_name: &'a str, ) -> Result, Error> { build_store_path_from_fingerprint_parts( &(String::from("output:") + output_name), - drv_hash, + drv_sha256, output_path_name, ) } @@ -145,18 +142,20 @@ pub fn build_output_path<'a>( /// but other fingerprints starting with "output:" are also used in Derivation /// output path calculation. /// -/// The fingerprint is hashed with sha256, its digest is compressed to 20 bytes, -/// and nixbase32-encoded (32 characters). +/// The fingerprint is hashed with sha256, and its digest is compressed to 20 +/// bytes. +/// Inside a StorePath, that digest is printed nixbase32-encoded +/// (32 characters). fn build_store_path_from_fingerprint_parts<'a>( ty: &str, - hash: &NixHash, + inner_digest: &[u8; 32], name: &'a str, ) -> Result, Error> { - let digest: [u8; DIGEST_SIZE] = compress_hash(&{ - let mut h = Sha256::new(); - write!(h, "{ty}:{}:{STORE_DIR}:{name}", hash.to_nix_hex_string()).unwrap(); - h.finalize() - }); + let fingerprint = format!( + "{ty}:sha256:{}:{STORE_DIR}:{name}", + HEXLOWER.encode(inner_digest) + ); + let digest: [u8; DIGEST_SIZE] = compress_hash(&Sha256::new_with_prefix(fingerprint).finalize()); // name validation happens in here. StorePathRef::from_name_and_digest(name, &digest) -- cgit 1.4.1