about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFlorian Klink <flokli@flokli.de>2023-01-04T21·26+0100
committerflokli <flokli@flokli.de>2023-01-06T12·25+0000
commit95bec264d55ddc4d1c9f53211c49899d93babd12 (patch)
tree464eb40ca6f9bc79c4640e342109938660ce89ed
parent9df9a2f1ab412848908efbabcb21ed6246263550 (diff)
feat(tvix/derivation): implement output path calculation r/5610
This implement output path calculation for fixed outputs, both fixed-
output and non-fixed-output.

Change-Id: I0a77b99f2ba6b39467cc5dd589ce152a40387f9a
Reviewed-on: https://cl.tvl.fyi/c/depot/+/7761
Reviewed-by: tazjin <tazjin@tvl.su>
Tested-by: BuildkiteCI
Reviewed-by: jrhahn <mail.jhahn@gmail.com>
-rw-r--r--tvix/derivation/src/derivation.rs171
-rw-r--r--tvix/derivation/src/tests/mod.rs93
2 files changed, 262 insertions, 2 deletions
diff --git a/tvix/derivation/src/derivation.rs b/tvix/derivation/src/derivation.rs
index a9e769543e..2afe672e2e 100644
--- a/tvix/derivation/src/derivation.rs
+++ b/tvix/derivation/src/derivation.rs
@@ -1,9 +1,9 @@
 use crate::nix_hash;
-use crate::output::Output;
+use crate::output::{Hash, Output};
 use crate::write;
 use serde::{Deserialize, Serialize};
 use sha2::{Digest, Sha256};
-use std::{collections::BTreeMap, fmt, fmt::Write, iter::FromIterator};
+use std::{collections::BTreeMap, fmt, fmt::Write};
 use tvix_store::nixbase32::NIXBASE32;
 use tvix_store::nixpath::{NixPath, ParseNixPathError, STORE_DIR};
 
@@ -147,6 +147,173 @@ impl Derivation {
 
         build_store_path(true, &hasher.finalize(), name)
     }
+
+    /// Calculate the drv replacement string for a given derivation.
+    ///
+    /// This is either called on a struct without output paths populated,
+    /// to provide the `drv_replacement_str` value for the `calculate_output_paths`
+    /// function call, or called on a struct with output paths populated, to
+    /// calculate / cache lookups for calls to fn_get_drv_replacement.
+    ///
+    /// `fn_get_drv_replacement` is used to look up the drv replacement strings
+    /// for input_derivations the Derivation refers to.
+    pub fn calculate_drv_replacement_str<F>(&self, fn_get_drv_replacement: F) -> String
+    where
+        F: Fn(&str) -> String,
+    {
+        let mut hasher = Sha256::new();
+        let digest = match self.get_fixed_output() {
+            Some((fixed_output_path, fixed_output_hash)) => {
+                hasher.update("fixed:out:");
+                hasher.update(&fixed_output_hash.algo);
+                hasher.update(":");
+                hasher.update(&fixed_output_hash.digest);
+                hasher.update(":");
+                hasher.update(fixed_output_path);
+                hasher.finalize()
+            }
+            None => {
+                let mut replaced_input_derivations: BTreeMap<String, Vec<String>> = BTreeMap::new();
+
+                // For each input_derivation, look up the replacement.
+                for (drv_path, input_derivation) in &self.input_derivations {
+                    replaced_input_derivations.insert(
+                        fn_get_drv_replacement(drv_path).to_string(),
+                        input_derivation.to_vec(),
+                    );
+                }
+
+                // construct a new derivation struct with these replaced input derivation strings
+                let replaced_derivation = Derivation {
+                    input_derivations: replaced_input_derivations,
+                    ..self.clone()
+                };
+
+                // write the ATerm of that to the hash function
+                hasher.update(replaced_derivation.to_string());
+
+                hasher.finalize()
+            }
+        };
+
+        format!("{:x}", digest)
+    }
+
+    /// This calculates all output paths of a Derivation and updates the struct.
+    /// It requires the struct to be initially without output paths.
+    /// This means, self.outputs[$outputName].path needs to be an empty string,
+    /// and self.environment[$outputName] needs to be an empty string.
+    ///
+    /// Output path calculation requires knowledge of "drv replacement
+    /// strings", and in case of non-fixed-output derivations, also knowledge
+    /// of "drv replacement" strings (recursively) of all input derivations.
+    ///
+    /// We solve this by asking the caller of this function to provide
+    /// the drv replacement string of the current derivation itself,
+    /// which is ran on the struct without output paths.
+    ///
+    /// This sound terribly ugly, but won't be too much of a concern later on, as
+    /// naming fixed-output paths once uploaded will be a tvix-store concern,
+    /// so there's no need to calculate them here anymore.
+    ///
+    /// On completion, self.environment[$outputName] and
+    /// self.outputs[$outputName].path are set to the calculated output path for all
+    /// outputs.
+    pub fn calculate_output_paths(
+        &mut self,
+        name: &str,
+        drv_replacement_str: &str,
+    ) -> Result<(), ParseNixPathError> {
+        let mut hasher = Sha256::new();
+
+        // Check if the Derivation is fixed output, because they cause
+        // different fingerprints to be hashed.
+        match self.get_fixed_output() {
+            None => {
+                // The fingerprint and hash differs per output
+                for (output_name, output) in self.outputs.iter_mut() {
+                    // Assert that outputs are not yet populated, to avoid using this function wrongly.
+                    // We don't also go over self.environment, but it's a sufficient
+                    // footgun prevention mechanism.
+                    assert!(output.path.is_empty());
+
+                    hasher.update("output:");
+                    hasher.update(output_name);
+                    hasher.update(":sha256:");
+                    hasher.update(drv_replacement_str);
+                    hasher.update(":");
+                    hasher.update(tvix_store::nixpath::STORE_DIR);
+                    hasher.update(":");
+
+                    // calculate the output_name_path, which is the part of the NixPath after the digest.
+                    let mut output_path_name = name.to_string();
+                    if output_name != "out" {
+                        output_path_name.push('-');
+                        output_path_name.push_str(output_name);
+                    }
+
+                    hasher.update(output_path_name.as_str());
+
+                    let digest = hasher.finalize_reset();
+
+                    let abs_store_path = format!(
+                        "{}/{}",
+                        tvix_store::nixpath::STORE_DIR,
+                        build_store_path(false, &digest, &output_path_name)?
+                    );
+
+                    output.path = abs_store_path.clone();
+                    self.environment
+                        .insert(output_name.to_string(), abs_store_path);
+                }
+            }
+            Some((fixed_output_path, fixed_output_hash)) => {
+                // Assert that outputs are not yet populated, to avoid using this function wrongly.
+                // We don't also go over self.environment, but it's a sufficient
+                // footgun prevention mechanism.
+                assert!(fixed_output_path.is_empty());
+
+                let digest = {
+                    // Fixed-output derivation.
+                    // There's two different hashing strategies in place, depending on the value of hash.algo.
+                    // This code is _weird_ but it is what Nix is doing. See:
+                    // https://github.com/NixOS/nix/blob/1385b2007804c8a0370f2a6555045a00e34b07c7/src/libstore/store-api.cc#L178-L196
+                    if fixed_output_hash.algo == "r:sha256" {
+                        hasher.update("source:");
+                        hasher.update("sha256");
+                        hasher.update(":");
+                        hasher.update(fixed_output_hash.digest.clone()); // nixbase32
+                    } else {
+                        hasher.update("output:out:sha256:");
+                        // This is drv_replacement for FOD, with an empty fixed_output_path.
+                        hasher.update(drv_replacement_str);
+                    }
+                    hasher.update(":");
+                    hasher.update(tvix_store::nixpath::STORE_DIR);
+                    hasher.update(":");
+                    hasher.update(name);
+                    hasher.finalize()
+                };
+
+                let abs_store_path = format!(
+                    "{}/{}",
+                    tvix_store::nixpath::STORE_DIR,
+                    build_store_path(false, &digest, name)?
+                );
+
+                self.outputs.insert(
+                    "out".to_string(),
+                    Output {
+                        path: abs_store_path.clone(),
+                        hash: Some(fixed_output_hash.clone()),
+                    },
+                );
+                self.environment.insert("out".to_string(), abs_store_path);
+            }
+        };
+
+        Ok(())
+    }
 }
 
 impl fmt::Display for Derivation {
diff --git a/tvix/derivation/src/tests/mod.rs b/tvix/derivation/src/tests/mod.rs
index 623cc4e5b1..dbba6e5215 100644
--- a/tvix/derivation/src/tests/mod.rs
+++ b/tvix/derivation/src/tests/mod.rs
@@ -1,4 +1,5 @@
 use crate::derivation::Derivation;
+use crate::output::Output;
 use std::fs::File;
 use std::io::Read;
 use std::path::Path;
@@ -67,3 +68,95 @@ fn derivation_path(name: &str, expected_path: &str) {
         NixPath::from_string(expected_path).unwrap()
     );
 }
+
+/// This trims all outputs from a Derivation struct,
+/// by setting outputs[$outputName].path and environment[$outputName] to the empty string.
+fn derivation_with_trimmed_outputs(derivation: &Derivation) -> Derivation {
+    let mut trimmed_env = derivation.environment.clone();
+    let mut trimmed_outputs = derivation.outputs.clone();
+
+    for (output_name, output) in &derivation.outputs {
+        trimmed_env.insert(output_name.clone(), "".to_string());
+        assert!(trimmed_outputs.contains_key(output_name));
+        trimmed_outputs.insert(
+            output_name.to_string(),
+            Output {
+                path: "".to_string(),
+                ..output.clone()
+            },
+        );
+    }
+
+    // replace environment and outputs with the trimmed variants
+    Derivation {
+        environment: trimmed_env,
+        outputs: trimmed_outputs,
+        ..derivation.clone()
+    }
+}
+
+#[test_case("0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv", "724f3e3634fce4cbbbd3483287b8798588e80280660b9a63fd13a1bc90485b33"; "fixed_sha256")]
+#[test_case("ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv", "c79aebd0ce3269393d4a1fde2cbd1d975d879b40f0bf40a48f550edc107fd5df";"fixed-sha1")]
+fn replacement_drv_path(drv_path: &str, expected_replacement_str: &str) {
+    // read in the fixture
+    let data = read_file(&format!("{}/{}.json", RESOURCES_PATHS, drv_path));
+    let drv: Derivation = serde_json::from_str(&data).expect("must deserialize");
+
+    let drv_replacement_str = drv.calculate_drv_replacement_str(|_| panic!("must not be called"));
+
+    assert_eq!(expected_replacement_str, drv_replacement_str);
+}
+
+#[test_case("bar","0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv"; "fixed_sha256")]
+#[test_case("foo", "4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv"; "simple-sha256")]
+#[test_case("bar", "ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv"; "fixed-sha1")]
+#[test_case("foo", "ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv"; "simple-sha1")]
+#[test_case("has-multi-out", "h32dahq0bx5rp1krcdx3a53asj21jvhk-has-multi-out.drv"; "multiple-outputs")]
+#[test_case("structured-attrs", "9lj1lkjm2ag622mh4h9rpy6j607an8g2-structured-attrs.drv"; "structured-attrs")]
+#[test_case("unicode", "52a9id8hx688hvlnz4d1n25ml1jdykz0-unicode.drv"; "unicode")]
+fn output_paths(name: &str, drv_path: &str) {
+    // read in the fixture
+    let data = read_file(&format!("{}/{}.json", RESOURCES_PATHS, drv_path));
+    let expected_derivation: Derivation = serde_json::from_str(&data).expect("must deserialize");
+
+    let mut derivation = derivation_with_trimmed_outputs(&expected_derivation);
+
+    // calculate the drv replacement string.
+    // We don't expect the lookup function to be called for most derivations.
+    let replacement_str = derivation.calculate_drv_replacement_str(|drv_name| {
+        // 4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv may lookup /nix/store/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv
+        // ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv may lookup /nix/store/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv
+        if name == "foo"
+            && ((drv_path == "4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv"
+                && drv_name == "/nix/store/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv")
+                || (drv_path == "ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv"
+                    && drv_name == "/nix/store/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv"))
+        {
+            // do the lookup, by reading in the fixture of the requested
+            // drv_name, and calculating its drv replacement (on the non-stripped version)
+            // In a real-world scenario you would have already done this during construction.
+
+            let data = read_file(&format!(
+                "{}/{}.json",
+                RESOURCES_PATHS,
+                Path::new(drv_name).file_name().unwrap().to_string_lossy()
+            ));
+
+            let drv: Derivation = serde_json::from_str(&data).expect("must deserialize");
+
+            // calculate replacement string. These don't trigger any subsequent requests, as they're both FOD.
+            drv.calculate_drv_replacement_str(|_| panic!("must not lookup"))
+        } else {
+            // we only expect this to be called in the "foo" testcase, for the "bar derivations"
+            panic!("may only be called for foo testcase on bar derivations");
+        }
+    });
+
+    // We need to calculate the replacement_str, as fixed-sha1 does use it.
+    derivation
+        .calculate_output_paths(&name, &replacement_str)
+        .unwrap();
+
+    // The derivation should now look like it was before
+    assert_eq!(expected_derivation, derivation);
+}