diff options
118 files changed, 4572 insertions, 2395 deletions
diff --git a/ops/modules/www/tazj.in.nix b/ops/modules/www/tazj.in.nix index ea3cf1dc44..47eefca2a6 100644 --- a/ops/modules/www/tazj.in.nix +++ b/ops/modules/www/tazj.in.nix @@ -35,7 +35,7 @@ # redirect for easier entry on a TV location = /tv { - return 302 https://tazj.in/blobs/play.html + return 302 https://tazj.in/blobs/play.html; } # Temporary place for serving static files. diff --git a/ops/users/default.nix b/ops/users/default.nix index c54a681dce..a2182a5460 100644 --- a/ops/users/default.nix +++ b/ops/users/default.nix @@ -229,4 +229,9 @@ email = "tvl@alice-carroll.pet"; password = "{ARGON2}$argon2id$v=19$m=19456,t=2,p=1$mt/0RzKw4RHxm7ybpMHP5Q$P/SDBMv5si9D98NFO/eZgh2+InlByqYxqAvQWhl+p0c"; } + { + username = "yuka"; + email = "tvl@yuka.dev"; + password = "{ARGON2}$argon2id$v=19$m=65536,t=2,p=1$aEyiAIuynQMwfY7xE+pMxg$QdghylHO2JZMR/YyYf4UAnhhb/gBdAkoDeANEwdixxU"; + } ] diff --git a/third_party/overlays/tvl.nix b/third_party/overlays/tvl.nix index 23f56e2f98..b54e899b88 100644 --- a/third_party/overlays/tvl.nix +++ b/third_party/overlays/tvl.nix @@ -97,44 +97,12 @@ depot.nix.readTree.drvTargets { ]; }); - crate2nix = super.rustPlatform.buildRustPackage rec { - pname = "crate2nix"; - version = "0.13.0"; - - src = super.fetchFromGitHub { - owner = "nix-community"; - repo = "crate2nix"; - rev = "ceb06eb7e76afb9e01a5f069aae136f97df72730"; - hash = "sha256-JTMe8GViCQt51WUiaaoIPmWtwEeeYrl6pBxo2DNuKig="; - }; - - patches = [ + crate2nix = super.crate2nix.overrideAttrs (old: { + patches = old.patches or [ ] ++ [ + # https://github.com/nix-community/crate2nix/pull/301 ./patches/crate2nix-tests-debug.patch - ./patches/crate2nix-run-tests-in-build-source.patch ]; - - sourceRoot = "${src.name}/crate2nix"; - - cargoHash = "sha256-dhlSXY1CJE+JJt+6Y7W1MVMz36nwr6ny543py1TcjyY="; - - nativeBuildInputs = [ super.makeWrapper ]; - - # Tests use nix(1), which tries (and fails) to set up /nix/var inside the - # sandbox - doCheck = false; - - postFixup = '' - wrapProgram $out/bin/crate2nix \ - --suffix PATH ":" ${lib.makeBinPath (with self; [ cargo nix_latest nix-prefetch-git ])} - - rm -rf $out/lib $out/bin/crate2nix.d - mkdir -p \ - $out/share/bash-completion/completions \ - $out/share/zsh/vendor-completions - $out/bin/crate2nix completions -s 'bash' -o $out/share/bash-completion/completions - $out/bin/crate2nix completions -s 'zsh' -o $out/share/zsh/vendor-completions - ''; - }; + }); evans = super.evans.overrideAttrs (old: { patches = old.patches or [ ] ++ [ diff --git a/third_party/sources/sources.json b/third_party/sources/sources.json index 5a6bae4866..109451ff51 100644 --- a/third_party/sources/sources.json +++ b/third_party/sources/sources.json @@ -17,10 +17,10 @@ "homepage": "https://nix-community.github.io/home-manager/", "owner": "nix-community", "repo": "home-manager", - "rev": "b787726a8413e11b074cde42704b4af32d95545c", - "sha256": "0amclig8lqn7ylb1r38yni4v4r1mf5m0qih7n2lvm8azjrybxfkr", + "rev": "c1609d584a6b5e9e6a02010f51bd368cb4782f8e", + "sha256": "112r86p3iah1xahwlp82yd3gvh10wkf271za5h7v3jsqv08c6gkr", "type": "tarball", - "url": "https://github.com/nix-community/home-manager/archive/b787726a8413e11b074cde42704b4af32d95545c.tar.gz", + "url": "https://github.com/nix-community/home-manager/archive/c1609d584a6b5e9e6a02010f51bd368cb4782f8e.tar.gz", "url_template": "https://github.com/<owner>/<repo>/archive/<rev>.tar.gz" }, "impermanence": { @@ -41,10 +41,10 @@ "homepage": "", "owner": "nmattia", "repo": "naersk", - "rev": "aeb58d5e8faead8980a807c840232697982d47b9", - "sha256": "185wg4p67krrjd8dx5h9pc381z7677nfzsdyp54kg3niqcf5wdzx", + "rev": "c5037590290c6c7dae2e42e7da1e247e54ed2d49", + "sha256": "1ql5ziwfrpmc8cxhgflmdy2z06z4dsdfzjwb2vv9bag6a2chrvq8", "type": "tarball", - "url": "https://github.com/nmattia/naersk/archive/aeb58d5e8faead8980a807c840232697982d47b9.tar.gz", + "url": "https://github.com/nmattia/naersk/archive/c5037590290c6c7dae2e42e7da1e247e54ed2d49.tar.gz", "url_template": "https://github.com/<owner>/<repo>/archive/<rev>.tar.gz" }, "napalm": { @@ -65,10 +65,10 @@ "homepage": "", "owner": "NixOS", "repo": "nixpkgs", - "rev": "fd281bd6b7d3e32ddfa399853946f782553163b5", - "sha256": "1hy81yj2dcg6kfsm63xcqf8kvigxglim1rcg1xpmy2rb6a8vqvsj", + "rev": "7bb2ccd8cdc44c91edba16c48d2c8f331fb3d856", + "sha256": "0ijqx995jw9i16f28whyjdll9b0nydmyl4n91bci2cgryxms7f8f", "type": "tarball", - "url": "https://github.com/NixOS/nixpkgs/archive/fd281bd6b7d3e32ddfa399853946f782553163b5.tar.gz", + "url": "https://github.com/NixOS/nixpkgs/archive/7bb2ccd8cdc44c91edba16c48d2c8f331fb3d856.tar.gz", "url_template": "https://github.com/<owner>/<repo>/archive/<rev>.tar.gz" }, "nixpkgs-stable": { @@ -77,10 +77,10 @@ "homepage": "", "owner": "NixOS", "repo": "nixpkgs", - "rev": "72da83d9515b43550436891f538ff41d68eecc7f", - "sha256": "177sws22nqkvv8am76qmy9knham2adfh3gv7hrjf6492z1mvy02y", + "rev": "dd37924974b9202f8226ed5d74a252a9785aedf8", + "sha256": "1nxd4dqci8rs94a7cypx30axgj778p2wydkx16q298n29crkflbw", "type": "tarball", - "url": "https://github.com/NixOS/nixpkgs/archive/72da83d9515b43550436891f538ff41d68eecc7f.tar.gz", + "url": "https://github.com/NixOS/nixpkgs/archive/dd37924974b9202f8226ed5d74a252a9785aedf8.tar.gz", "url_template": "https://github.com/<owner>/<repo>/archive/<rev>.tar.gz" }, "rust-overlay": { @@ -89,10 +89,10 @@ "homepage": "", "owner": "oxalica", "repo": "rust-overlay", - "rev": "41b3b080cc3e4b3a48e933b87fc15a05f1870779", - "sha256": "13xp3bsgwpld8bkh5sjkigxcy5nz336hyc9xssk58glpgf1sxddm", + "rev": "2a42c742ab04b61d9b2f1edf392842cf9f27ebfd", + "sha256": "1wpkca75ysb2ssycc0dshd1m76q8iqhzrrbr6xmfmkkcj1p333nk", "type": "tarball", - "url": "https://github.com/oxalica/rust-overlay/archive/41b3b080cc3e4b3a48e933b87fc15a05f1870779.tar.gz", + "url": "https://github.com/oxalica/rust-overlay/archive/2a42c742ab04b61d9b2f1edf392842cf9f27ebfd.tar.gz", "url_template": "https://github.com/<owner>/<repo>/archive/<rev>.tar.gz" }, "rustsec-advisory-db": { @@ -101,10 +101,10 @@ "homepage": "https://rustsec.org", "owner": "RustSec", "repo": "advisory-db", - "rev": "0bc9a77248be5cb5f2b51fe6aba8ba451d74c6bb", - "sha256": "1fmgz6a2b63yy5cn2ghbqj8l0pdb2rwr5agr1m4mzaydlyypx26m", + "rev": "35e7459a331d3e0c585e56dabd03006b9b354088", + "sha256": "1j8c0vzwg6b9lxmdy2a40pvwsy2kncv455spbjbxsj10p2vmy5fl", "type": "tarball", - "url": "https://github.com/RustSec/advisory-db/archive/0bc9a77248be5cb5f2b51fe6aba8ba451d74c6bb.tar.gz", + "url": "https://github.com/RustSec/advisory-db/archive/35e7459a331d3e0c585e56dabd03006b9b354088.tar.gz", "url_template": "https://github.com/<owner>/<repo>/archive/<rev>.tar.gz" } } diff --git a/tools/when/default.nix b/tools/when/default.nix new file mode 100644 index 0000000000..1aee5e1ea8 --- /dev/null +++ b/tools/when/default.nix @@ -0,0 +1,6 @@ +{ depot, ... }: + +depot.nix.buildGo.program { + name = "when"; + srcs = [ ./when.go ]; +} diff --git a/tools/when/when.go b/tools/when/when.go new file mode 100644 index 0000000000..a2ac494e8c --- /dev/null +++ b/tools/when/when.go @@ -0,0 +1,206 @@ +package main + +import ( + "fmt" + "os" + "strconv" + "strings" + "time" +) + +const usage = `usage: when <time> + +This program converts the given time into various formats (currently a local +timestamp, UTC timestamp, and UNIX epoch). It tries to guess what the input is. + +Some valid queries: + + 2024-01-05 + 1715079241 + tomorrow 5PM + -22h + -7h10m + Mar 15 + Sep 3 18:00 + +For now a single timestamp and a single duration (which is added either to the +current time, or the given time) is supported.` + +func printTime(t time.Time) { + fmt.Println("Local:", t.Format("Mon 02 January 2006 at 15:04:05 MST")) + fmt.Println("UTC: ", t.UTC().Format(time.RFC3339)) + fmt.Println("UNIX: ", t.Unix()) +} + +type FieldSet uint8 + +const ( + SetYear FieldSet = 1 << iota + SetDay + SetMonth + SetHour + SetMinute + SetSecond + SetLocation +) + +const ( + SetDate = SetYear | SetDay | SetMonth + SetClock = SetHour | SetMinute | SetSecond +) + +// mergeTimes returns a new time.Time with all fields in this overridden with the +// specified fields from that. +func mergeTimes(this time.Time, that time.Time, set FieldSet) time.Time { + year, month, day := this.Date() + hour, min, sec := this.Clock() + loc := this.Location() + + if set&SetYear == SetYear { + year = that.Year() + } + if set&SetMonth == SetMonth { + month = that.Month() + } + if set&SetDay == SetDay { + day = that.Day() + } + if set&SetHour == SetHour { + hour = that.Hour() + } + if set&SetMinute == SetMinute { + min = that.Minute() + } + if set&SetSecond == SetSecond { + sec = that.Second() + } + if set&SetLocation == SetLocation { + loc = that.Location() + } + + return time.Date(year, month, day, hour, min, sec, 0, loc) +} + +func parseTime(input string) (time.Time, error) { + // try unix times + if i, err := strconv.ParseInt(input, 10, 64); err == nil { + if i < 9999999999 { + return time.Unix(i, 0), nil + } + if i < 9999999999999 { + return time.UnixMilli(i), nil + } + } + + // try simple date/time formats + if t, err := time.Parse(time.DateOnly, input); err == nil { + return t, nil + } + + if t, err := time.Parse(time.Kitchen, input); err == nil { + now := time.Now() + return mergeTimes(now, t, SetClock), nil + } + + if t, err := time.Parse(time.TimeOnly, input); err == nil { + now := time.Now() + return mergeTimes(now, t, SetClock), nil + } + + if t, err := time.Parse("15:04", input); err == nil { + now := time.Now() + return mergeTimes(now, t, SetClock), nil + } + + if t, err := time.Parse("3PM", input); err == nil { + now := time.Now() + return mergeTimes(now, t, SetClock), nil + } + + if t, err := time.Parse(time.DateTime, input); err == nil { + return t, nil + } + + if t, err := time.Parse(time.Stamp, input); err == nil { + now := time.Now() + return mergeTimes(t, now, SetYear|SetLocation), nil + } + + if t, err := time.Parse("Jan _2 15:04", input); err == nil { + now := time.Now() + return mergeTimes(t, now, SetYear|SetLocation), nil + } + + if t, err := time.Parse("Jan _2", input); err == nil { + now := time.Now() + return mergeTimes(t, now, SetYear|SetLocation), nil + } + + return time.Time{}, fmt.Errorf("could not parse time: %q", input) +} + +func parseDuration(input string) (time.Duration, error) { + // some simple rewriting + switch input { + case "yesterday": + input = "-24h" + case "tomorrow": + input = "24h" + case "today", "now": + return time.Duration(0), nil + } + + // TODO: days, months, weeks, ... + return time.ParseDuration(input) +} + +func main() { + if len(os.Args) < 2 { + fmt.Fprintln(os.Stderr, usage) + os.Exit(1) + } + + var d time.Duration + var t time.Time + var err error + var haveTime, haveDuration bool + + // Try to parse entire input as one full thing, before getting more + // clever. + if t, err = parseTime(strings.Join(os.Args[1:], " ")); err == nil { + printTime(t) + return + } + + for _, arg := range os.Args[1:] { + if !haveTime { + if t, err = parseTime(arg); err == nil { + haveTime = true + continue + } + } + + if !haveDuration { + if d, err = parseDuration(arg); err == nil { + haveDuration = true + continue + } + } + } + + if err != nil { + fmt.Fprintln(os.Stderr, "Not sure what you want, try another time.") + os.Exit(1) + } + + if haveTime && haveDuration { + printTime(t.Add(d)) + } else if haveTime { + printTime(t) + } else if haveDuration { + printTime(time.Now().Add(d)) + } else { + fmt.Fprintln(os.Stderr, "Not sure what you want, try another time.") + os.Exit(1) + } +} diff --git a/tvix/Cargo.lock b/tvix/Cargo.lock index 334b69b7f5..92229302dd 100644 --- a/tvix/Cargo.lock +++ b/tvix/Cargo.lock @@ -18,6 +18,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" [[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] name = "aho-corasick" version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -27,6 +39,12 @@ dependencies = [ ] [[package]] +name = "allocator-api2" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" + +[[package]] name = "android-tzdata" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -134,9 +152,9 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.6" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a116f46a969224200a0a97f29cfd4c50e7534e4b4826bd23ea2c3c533039c82c" +checksum = "4e9eabd7a98fe442131a17c316bd9349c43695e49e730c3c8e12cfb5f4da2693" dependencies = [ "bzip2", "flate2", @@ -145,6 +163,8 @@ dependencies = [ "pin-project-lite", "tokio", "xz2", + "zstd", + "zstd-safe", ] [[package]] @@ -205,17 +225,6 @@ dependencies = [ ] [[package]] -name = "async-recursion" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fd55a5ba1179988837d24ab4c7cc8ed6efdeff578ede0416b4225a5fca35bd0" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.48", -] - -[[package]] name = "async-signal" version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -335,7 +344,7 @@ dependencies = [ "http 1.1.0", "http-body 1.0.0", "http-body-util", - "hyper 1.2.0", + "hyper 1.3.1", "hyper-util", "itoa", "matchit", @@ -1400,7 +1409,7 @@ dependencies = [ "hyper 0.14.28", "hyper-rustls", "ring", - "rustls 0.21.10", + "rustls 0.21.12", "rustls-pemfile 1.0.4", "serde", "serde_json", @@ -1462,9 +1471,9 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "h2" -version = "0.3.24" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb2c4422095b67ee78da96fbb51a4cc413b3b25883c7717ff7ca1ab31022c9c9" +checksum = "81fe527a889e1532da5c525686d96d4c2e74cdd345badf8dfef9f6b39dd5f5e8" dependencies = [ "bytes", "fnv", @@ -1481,9 +1490,9 @@ dependencies = [ [[package]] name = "h2" -version = "0.4.3" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51ee2dd2e4f378392eeff5d51618cd9a63166a2513846bbc55f21cfacd9199d4" +checksum = "816ec7294445779408f36fe57bc5b7fc1cf59664059096c65f905c1c61f58069" dependencies = [ "bytes", "fnv", @@ -1515,6 +1524,10 @@ name = "hashbrown" version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" +dependencies = [ + "ahash", + "allocator-api2", +] [[package]] name = "heck" @@ -1633,7 +1646,7 @@ dependencies = [ "futures-channel", "futures-core", "futures-util", - "h2 0.3.24", + "h2 0.3.26", "http 0.2.11", "http-body 0.4.6", "httparse", @@ -1649,14 +1662,14 @@ dependencies = [ [[package]] name = "hyper" -version = "1.2.0" +version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "186548d73ac615b32a73aafe38fb4f56c0d340e110e5a200bcadbaf2e199263a" +checksum = "fe575dd17d0862a9a33781c8c4696a55c320909004a67a00fb286ba8b1bc496d" dependencies = [ "bytes", "futures-channel", "futures-util", - "h2 0.4.3", + "h2 0.4.4", "http 1.1.0", "http-body 1.0.0", "httparse", @@ -1676,7 +1689,7 @@ dependencies = [ "futures-util", "http 0.2.11", "hyper 0.14.28", - "rustls 0.21.10", + "rustls 0.21.12", "rustls-native-certs 0.6.3", "tokio", "tokio-rustls 0.24.1", @@ -1704,7 +1717,7 @@ dependencies = [ "futures-util", "http 1.1.0", "http-body 1.0.0", - "hyper 1.2.0", + "hyper 1.3.1", "pin-project-lite", "socket2", "tokio", @@ -2000,6 +2013,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" [[package]] +name = "lru" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3262e75e648fce39813cb56ac41f3c3e3f65217ebf3844d818d1f9398cfb0dc" +dependencies = [ + "hashbrown 0.14.3", +] + +[[package]] name = "lzma-sys" version = "0.1.20" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -2104,9 +2126,9 @@ dependencies = [ [[package]] name = "mio" -version = "0.8.10" +version = "0.8.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f3d0b296e374a4e6f3c7b0a1f5a51d748a0d34c85e7dc48fc3fa9a87657fe09" +checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" dependencies = [ "libc", "log", @@ -2283,7 +2305,7 @@ dependencies = [ "hyper 0.14.28", "itertools 0.12.0", "md-5", - "parking_lot 0.12.1", + "parking_lot 0.12.2", "percent-encoding", "quick-xml", "rand", @@ -2438,9 +2460,9 @@ dependencies = [ [[package]] name = "parking_lot" -version = "0.12.1" +version = "0.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" +checksum = "7e4af0ca4f6caed20e900d564c242b8e5d4903fdacf31d3daf527b66fe6f42fb" dependencies = [ "lock_api", "parking_lot_core 0.9.9", @@ -3010,7 +3032,7 @@ dependencies = [ "encoding_rs", "futures-core", "futures-util", - "h2 0.3.24", + "h2 0.3.26", "http 0.2.11", "http-body 0.4.6", "hyper 0.14.28", @@ -3022,7 +3044,7 @@ dependencies = [ "once_cell", "percent-encoding", "pin-project-lite", - "rustls 0.21.10", + "rustls 0.21.12", "rustls-native-certs 0.6.3", "rustls-pemfile 1.0.4", "serde", @@ -3154,9 +3176,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.21.10" +version = "0.21.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9d5a6813c0759e4609cd494e8e725babae6a2ca7b62a5536a13daaec6fcb7ba" +checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" dependencies = [ "log", "ring", @@ -3166,9 +3188,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.22.2" +version = "0.22.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e87c9956bd9807afa1f77e0f7594af32566e830e088a5576d27c5b6f30f49d41" +checksum = "bf4ef73721ac7bcd79b2b315da7779d8fc09718c6b3d2d1b2d94850eb8c18432" dependencies = [ "log", "ring", @@ -3905,7 +3927,7 @@ version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" dependencies = [ - "rustls 0.21.10", + "rustls 0.21.12", "tokio", ] @@ -3915,7 +3937,7 @@ version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "775e0c0f0adb3a2f22a00c4745d728b479985fc15ee7ca6a2608388c5569860f" dependencies = [ - "rustls 0.22.2", + "rustls 0.22.4", "rustls-pki-types", "tokio", ] @@ -4020,7 +4042,7 @@ dependencies = [ "bytes", "futures-core", "futures-util", - "h2 0.3.24", + "h2 0.3.26", "http 0.2.11", "http-body 0.4.6", "hyper 0.14.28", @@ -4047,7 +4069,7 @@ dependencies = [ "axum 0.6.20", "base64", "bytes", - "h2 0.3.24", + "h2 0.3.26", "http 0.2.11", "http-body 0.4.6", "hyper 0.14.28", @@ -4276,7 +4298,7 @@ dependencies = [ "lazy_static", "libc", "object_store", - "parking_lot 0.12.1", + "parking_lot 0.12.2", "petgraph", "pin-project-lite", "prost 0.12.3", @@ -4385,7 +4407,6 @@ name = "tvix-glue" version = "0.1.0" dependencies = [ "async-compression", - "async-recursion", "bstr", "bytes", "criterion", @@ -4434,8 +4455,8 @@ name = "tvix-store" version = "0.1.0" dependencies = [ "anyhow", + "async-compression", "async-process", - "async-recursion", "async-stream", "bigtable_rs", "blake3", @@ -4446,10 +4467,12 @@ dependencies = [ "data-encoding", "futures", "lazy_static", + "lru", "nix-compat", "opentelemetry", "opentelemetry-otlp", "opentelemetry_sdk", + "parking_lot 0.12.2", "pin-project-lite", "prost 0.12.3", "prost-build", @@ -4479,7 +4502,6 @@ dependencies = [ "tvix-castore", "url", "walkdir", - "xz2", ] [[package]] @@ -5066,6 +5088,26 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" [[package]] +name = "zerocopy" +version = "0.7.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae87e3fcd617500e5d106f0380cf7b77f3c6092aae37191433159dda23cfb087" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.48", +] + +[[package]] name = "zeroize" version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" diff --git a/tvix/Cargo.nix b/tvix/Cargo.nix index 51d47b05e3..32f0c8c8e9 100644 --- a/tvix/Cargo.nix +++ b/tvix/Cargo.nix @@ -1,4 +1,4 @@ -# This file was @generated by crate2nix 0.13.0 with the command: +# This file was @generated by crate2nix 0.14.0 with the command: # "generate" "--all-features" # See https://github.com/kolloch/crate2nix for more info. @@ -193,6 +193,49 @@ rec { "rustc-dep-of-std" = [ "core" "compiler_builtins" ]; }; }; + "ahash" = rec { + crateName = "ahash"; + version = "0.8.11"; + edition = "2018"; + sha256 = "04chdfkls5xmhp1d48gnjsmglbqibizs3bpbj6rsj604m10si7g8"; + authors = [ + "Tom Kaitchuck <Tom.Kaitchuck@gmail.com>" + ]; + dependencies = [ + { + name = "cfg-if"; + packageId = "cfg-if"; + } + { + name = "once_cell"; + packageId = "once_cell"; + usesDefaultFeatures = false; + target = { target, features }: (!(("arm" == target."arch" or null) && ("none" == target."os" or null))); + features = [ "alloc" ]; + } + { + name = "zerocopy"; + packageId = "zerocopy"; + usesDefaultFeatures = false; + features = [ "simd" ]; + } + ]; + buildDependencies = [ + { + name = "version_check"; + packageId = "version_check"; + } + ]; + features = { + "atomic-polyfill" = [ "dep:atomic-polyfill" "once_cell/atomic-polyfill" ]; + "compile-time-rng" = [ "const-random" ]; + "const-random" = [ "dep:const-random" ]; + "default" = [ "std" "runtime-rng" ]; + "getrandom" = [ "dep:getrandom" ]; + "runtime-rng" = [ "getrandom" ]; + "serde" = [ "dep:serde" ]; + }; + }; "aho-corasick" = rec { crateName = "aho-corasick"; version = "1.1.2"; @@ -218,6 +261,21 @@ rec { }; resolvedDefaultFeatures = [ "default" "perf-literal" "std" ]; }; + "allocator-api2" = rec { + crateName = "allocator-api2"; + version = "0.2.18"; + edition = "2018"; + sha256 = "0kr6lfnxvnj164j1x38g97qjlhb7akppqzvgfs0697140ixbav2w"; + authors = [ + "Zakarum <zaq.dev@icloud.com>" + ]; + features = { + "default" = [ "std" ]; + "serde" = [ "dep:serde" ]; + "std" = [ "alloc" ]; + }; + resolvedDefaultFeatures = [ "alloc" ]; + }; "android-tzdata" = rec { crateName = "android-tzdata"; version = "0.1.1"; @@ -454,9 +512,9 @@ rec { }; "async-compression" = rec { crateName = "async-compression"; - version = "0.4.6"; + version = "0.4.9"; edition = "2018"; - sha256 = "0b6874q56g1cx8ivs9j89d757rsh9kyrrwlp1852094jjrmg85m1"; + sha256 = "14r6vbsbbkqjiqy0qwwywjakdi29jfyidhqp389l5r4gm7bsp7jf"; authors = [ "Wim Looman <wim@nemo157.com>" "Allen Bui <fairingrey@gmail.com>" @@ -496,6 +554,27 @@ rec { packageId = "xz2"; optional = true; } + { + name = "zstd"; + packageId = "zstd"; + rename = "libzstd"; + optional = true; + usesDefaultFeatures = false; + } + { + name = "zstd-safe"; + packageId = "zstd-safe"; + optional = true; + usesDefaultFeatures = false; + } + ]; + devDependencies = [ + { + name = "tokio"; + packageId = "tokio"; + usesDefaultFeatures = false; + features = [ "io-util" "macros" "rt-multi-thread" "io-std" ]; + } ]; features = { "all" = [ "all-implementations" "all-algorithms" ]; @@ -518,7 +597,7 @@ rec { "zstd-safe" = [ "dep:zstd-safe" ]; "zstdmt" = [ "zstd" "zstd-safe/zstdmt" ]; }; - resolvedDefaultFeatures = [ "bzip2" "flate2" "gzip" "tokio" "xz" "xz2" ]; + resolvedDefaultFeatures = [ "bzip2" "flate2" "gzip" "libzstd" "tokio" "xz" "xz2" "zstd" "zstd-safe" ]; }; "async-io" = rec { crateName = "async-io"; @@ -698,35 +777,6 @@ rec { ]; }; - "async-recursion" = rec { - crateName = "async-recursion"; - version = "1.0.5"; - edition = "2018"; - sha256 = "1l2vlgyaa9a2dd0y1vbqyppzsvpdr1y4rar4gn1qi68pl5dmmmaz"; - procMacro = true; - authors = [ - "Robert Usher <266585+dcchut@users.noreply.github.com>" - ]; - dependencies = [ - { - name = "proc-macro2"; - packageId = "proc-macro2"; - usesDefaultFeatures = false; - } - { - name = "quote"; - packageId = "quote"; - usesDefaultFeatures = false; - } - { - name = "syn"; - packageId = "syn 2.0.48"; - usesDefaultFeatures = false; - features = [ "full" "parsing" "printing" "proc-macro" "clone-impls" ]; - } - ]; - - }; "async-signal" = rec { crateName = "async-signal"; version = "0.2.5"; @@ -1110,7 +1160,7 @@ rec { } { name = "hyper"; - packageId = "hyper 1.2.0"; + packageId = "hyper 1.3.1"; optional = true; } { @@ -4299,7 +4349,7 @@ rec { } { name = "rustls"; - packageId = "rustls 0.21.10"; + packageId = "rustls 0.21.12"; } { name = "rustls-pemfile"; @@ -4477,11 +4527,11 @@ rec { ]; }; - "h2 0.3.24" = rec { + "h2 0.3.26" = rec { crateName = "h2"; - version = "0.3.24"; + version = "0.3.26"; edition = "2018"; - sha256 = "1jf9488b66nayxzp3iw3b2rb64y49hdbbywnv9wfwrsv14i48b5v"; + sha256 = "1s7msnfv7xprzs6xzfj5sg6p8bjcdpcqcmjjbkd345cyi1x55zl1"; authors = [ "Carl Lerche <me@carllerche.com>" "Sean McArthur <sean@seanmonstar.com>" @@ -4549,11 +4599,11 @@ rec { ]; features = { }; }; - "h2 0.4.3" = rec { + "h2 0.4.4" = rec { crateName = "h2"; - version = "0.4.3"; + version = "0.4.4"; edition = "2021"; - sha256 = "1m4rj76zl77jany6p10k4mm1cqwsrlc1dmgmxwp3jy7kwk92vvji"; + sha256 = "0sc0ymhiqp4hbz39d405cjbga77wnz2pprbgyc498xs58hlwfvl1"; authors = [ "Carl Lerche <me@carllerche.com>" "Sean McArthur <sean@seanmonstar.com>" @@ -4668,6 +4718,21 @@ rec { authors = [ "Amanieu d'Antras <amanieu@gmail.com>" ]; + dependencies = [ + { + name = "ahash"; + packageId = "ahash"; + optional = true; + usesDefaultFeatures = false; + } + { + name = "allocator-api2"; + packageId = "allocator-api2"; + optional = true; + usesDefaultFeatures = false; + features = [ "alloc" ]; + } + ]; features = { "ahash" = [ "dep:ahash" ]; "alloc" = [ "dep:alloc" ]; @@ -4682,7 +4747,7 @@ rec { "rustc-dep-of-std" = [ "nightly" "core" "compiler_builtins" "alloc" "rustc-internal-api" ]; "serde" = [ "dep:serde" ]; }; - resolvedDefaultFeatures = [ "inline-more" "raw" ]; + resolvedDefaultFeatures = [ "ahash" "allocator-api2" "default" "inline-more" "raw" ]; }; "heck" = rec { crateName = "heck"; @@ -4956,7 +5021,7 @@ rec { } { name = "h2"; - packageId = "h2 0.3.24"; + packageId = "h2 0.3.26"; optional = true; } { @@ -5034,11 +5099,11 @@ rec { }; resolvedDefaultFeatures = [ "client" "default" "full" "h2" "http1" "http2" "runtime" "server" "socket2" "stream" "tcp" ]; }; - "hyper 1.2.0" = rec { + "hyper 1.3.1" = rec { crateName = "hyper"; - version = "1.2.0"; + version = "1.3.1"; edition = "2021"; - sha256 = "0fi6k7hz5fmdph0a5r8hw50d7h2n9zxkizmafcmb65f67bblhr8q"; + sha256 = "0va9pjqshsr8zc07m9h4j2821hsmd9lw9j416yisjqh8gp8msmzy"; authors = [ "Sean McArthur <sean@seanmonstar.com>" ]; @@ -5060,7 +5125,7 @@ rec { } { name = "h2"; - packageId = "h2 0.4.3"; + packageId = "h2 0.4.4"; optional = true; } { @@ -5113,7 +5178,7 @@ rec { name = "futures-util"; packageId = "futures-util"; usesDefaultFeatures = false; - features = [ "sink" ]; + features = [ "alloc" "sink" ]; } { name = "tokio"; @@ -5123,7 +5188,7 @@ rec { ]; features = { "client" = [ "dep:want" "dep:pin-project-lite" "dep:smallvec" ]; - "ffi" = [ "dep:libc" "dep:http-body-util" ]; + "ffi" = [ "dep:libc" "dep:http-body-util" "futures-util?/alloc" ]; "full" = [ "client" "http1" "http2" "server" ]; "http1" = [ "dep:futures-channel" "dep:futures-util" "dep:httparse" "dep:itoa" ]; "http2" = [ "dep:futures-channel" "dep:futures-util" "dep:h2" ]; @@ -5155,7 +5220,7 @@ rec { } { name = "rustls"; - packageId = "rustls 0.21.10"; + packageId = "rustls 0.21.12"; usesDefaultFeatures = false; } { @@ -5181,7 +5246,7 @@ rec { } { name = "rustls"; - packageId = "rustls 0.21.10"; + packageId = "rustls 0.21.12"; usesDefaultFeatures = false; features = [ "tls12" ]; } @@ -5276,7 +5341,7 @@ rec { } { name = "hyper"; - packageId = "hyper 1.2.0"; + packageId = "hyper 1.3.1"; } { name = "pin-project-lite"; @@ -5302,7 +5367,7 @@ rec { } { name = "hyper"; - packageId = "hyper 1.2.0"; + packageId = "hyper 1.3.1"; features = [ "full" ]; } { @@ -6155,6 +6220,28 @@ rec { }; resolvedDefaultFeatures = [ "std" ]; }; + "lru" = rec { + crateName = "lru"; + version = "0.12.3"; + edition = "2015"; + sha256 = "1p5hryc967wdh56q9wzb2x9gdqy3yd0sqmnb2fcf7z28wrsjw9nk"; + authors = [ + "Jerome Froelich <jeromefroelic@hotmail.com>" + ]; + dependencies = [ + { + name = "hashbrown"; + packageId = "hashbrown 0.14.3"; + optional = true; + } + ]; + features = { + "default" = [ "hashbrown" ]; + "hashbrown" = [ "dep:hashbrown" ]; + "nightly" = [ "hashbrown" "hashbrown/nightly" ]; + }; + resolvedDefaultFeatures = [ "default" "hashbrown" ]; + }; "lzma-sys" = rec { crateName = "lzma-sys"; version = "0.1.20"; @@ -6424,9 +6511,9 @@ rec { }; "mio" = rec { crateName = "mio"; - version = "0.8.10"; + version = "0.8.11"; edition = "2018"; - sha256 = "02gyaxvaia9zzi4drrw59k9s0j6pa5d1y2kv7iplwjipdqlhngcg"; + sha256 = "034byyl0ardml5yliy1hmvx8arkmn9rv479pid794sm07ia519m4"; authors = [ "Carl Lerche <me@carllerche.com>" "Thomas de Zeeuw <thomasdezeeuw@gmail.com>" @@ -7025,7 +7112,7 @@ rec { } { name = "parking_lot"; - packageId = "parking_lot 0.12.1"; + packageId = "parking_lot 0.12.2"; } { name = "percent-encoding"; @@ -7596,11 +7683,11 @@ rec { }; resolvedDefaultFeatures = [ "default" ]; }; - "parking_lot 0.12.1" = rec { + "parking_lot 0.12.2" = rec { crateName = "parking_lot"; - version = "0.12.1"; - edition = "2018"; - sha256 = "13r2xk7mnxfc5g0g6dkdxqdqad99j7s7z8zhzz4npw5r0g0v4hip"; + version = "0.12.2"; + edition = "2021"; + sha256 = "1ys2dzz6cysjmwyivwxczl1ljpcf5cj4qmhdj07d5bkc9z5g0jky"; authors = [ "Amanieu d'Antras <amanieu@gmail.com>" ]; @@ -9265,7 +9352,7 @@ rec { } { name = "h2"; - packageId = "h2 0.3.24"; + packageId = "h2 0.3.26"; target = { target, features }: (!("wasm32" == target."arch" or null)); } { @@ -9328,7 +9415,7 @@ rec { } { name = "rustls"; - packageId = "rustls 0.21.10"; + packageId = "rustls 0.21.12"; optional = true; target = { target, features }: (!("wasm32" == target."arch" or null)); features = [ "dangerous_configuration" ]; @@ -9890,11 +9977,11 @@ rec { }; resolvedDefaultFeatures = [ "alloc" "default" "event" "fs" "net" "pipe" "process" "std" "termios" "time" "use-libc-auxv" ]; }; - "rustls 0.21.10" = rec { + "rustls 0.21.12" = rec { crateName = "rustls"; - version = "0.21.10"; + version = "0.21.12"; edition = "2021"; - sha256 = "1fmpzk3axnhkd99saqkvraifdfms4pkyi56lkihf8n877j0sdmgr"; + sha256 = "0gjdg2a9r81sdwkyw3n5yfbkrr6p9gyk3xr2kcsr3cs83x6s2miz"; dependencies = [ { name = "log"; @@ -9931,11 +10018,11 @@ rec { }; resolvedDefaultFeatures = [ "dangerous_configuration" "default" "log" "logging" "tls12" ]; }; - "rustls 0.22.2" = rec { + "rustls 0.22.4" = rec { crateName = "rustls"; - version = "0.22.2"; + version = "0.22.4"; edition = "2021"; - sha256 = "0hcxyhq6ynvws9v5b2h81s1nwmijmya7a3vyyyhsy1wqpmb9jz78"; + sha256 = "0cl4q6w0x1cl5ldjsgbbiiqhkz6qg5vxl5dkn9wwsyxc44vzfkmz"; dependencies = [ { name = "log"; @@ -12116,7 +12203,7 @@ rec { dependencies = [ { name = "rustls"; - packageId = "rustls 0.21.10"; + packageId = "rustls 0.21.12"; usesDefaultFeatures = false; } { @@ -12148,7 +12235,7 @@ rec { dependencies = [ { name = "rustls"; - packageId = "rustls 0.22.2"; + packageId = "rustls 0.22.4"; usesDefaultFeatures = false; } { @@ -12527,7 +12614,7 @@ rec { } { name = "h2"; - packageId = "h2 0.3.24"; + packageId = "h2 0.3.26"; optional = true; } { @@ -12678,7 +12765,7 @@ rec { } { name = "h2"; - packageId = "h2 0.3.24"; + packageId = "h2 0.3.26"; optional = true; } { @@ -13610,7 +13697,7 @@ rec { } { name = "parking_lot"; - packageId = "parking_lot 0.12.1"; + packageId = "parking_lot 0.12.2"; } { name = "petgraph"; @@ -13771,7 +13858,7 @@ rec { "tonic-reflection" = [ "dep:tonic-reflection" ]; "virtiofs" = [ "fs" "dep:vhost" "dep:vhost-user-backend" "dep:virtio-queue" "dep:vm-memory" "dep:vmm-sys-util" "dep:virtio-bindings" "fuse-backend-rs?/vhost-user-fs" "fuse-backend-rs?/virtiofs" ]; }; - resolvedDefaultFeatures = [ "cloud" "default" "fs" "fuse" "tonic-reflection" "virtiofs" ]; + resolvedDefaultFeatures = [ "cloud" "default" "fs" "fuse" "integration" "tonic-reflection" "virtiofs" ]; }; "tvix-cli" = rec { crateName = "tvix-cli"; @@ -14078,10 +14165,6 @@ rec { features = [ "tokio" "gzip" "bzip2" "xz" ]; } { - name = "async-recursion"; - packageId = "async-recursion"; - } - { name = "bstr"; packageId = "bstr"; } @@ -14275,8 +14358,9 @@ rec { packageId = "anyhow"; } { - name = "async-recursion"; - packageId = "async-recursion"; + name = "async-compression"; + packageId = "async-compression"; + features = [ "tokio" "bzip2" "gzip" "xz" "zstd" ]; } { name = "async-stream"; @@ -14322,6 +14406,10 @@ rec { packageId = "lazy_static"; } { + name = "lru"; + packageId = "lru"; + } + { name = "nix-compat"; packageId = "nix-compat"; features = [ "async" ]; @@ -14343,6 +14431,10 @@ rec { features = [ "rt-tokio" ]; } { + name = "parking_lot"; + packageId = "parking_lot 0.12.2"; + } + { name = "pin-project-lite"; packageId = "pin-project-lite"; } @@ -14444,10 +14536,6 @@ rec { name = "walkdir"; packageId = "walkdir"; } - { - name = "xz2"; - packageId = "xz2"; - } ]; buildDependencies = [ { @@ -14489,7 +14577,7 @@ rec { "tonic-reflection" = [ "dep:tonic-reflection" "tvix-castore/tonic-reflection" ]; "virtiofs" = [ "tvix-castore/virtiofs" ]; }; - resolvedDefaultFeatures = [ "cloud" "default" "fuse" "otlp" "tonic-reflection" "virtiofs" ]; + resolvedDefaultFeatures = [ "cloud" "default" "fuse" "integration" "otlp" "tonic-reflection" "virtiofs" ]; }; "typenum" = rec { crateName = "typenum"; @@ -16848,6 +16936,67 @@ rec { ]; }; + "zerocopy" = rec { + crateName = "zerocopy"; + version = "0.7.34"; + edition = "2018"; + sha256 = "11xhrwixm78m6ca1jdxf584wdwvpgg7q00vg21fhwl0psvyf71xf"; + authors = [ + "Joshua Liebow-Feeser <joshlf@google.com>" + ]; + dependencies = [ + { + name = "zerocopy-derive"; + packageId = "zerocopy-derive"; + optional = true; + } + { + name = "zerocopy-derive"; + packageId = "zerocopy-derive"; + target = { target, features }: false; + } + ]; + devDependencies = [ + { + name = "zerocopy-derive"; + packageId = "zerocopy-derive"; + } + ]; + features = { + "__internal_use_only_features_that_work_on_stable" = [ "alloc" "derive" "simd" ]; + "byteorder" = [ "dep:byteorder" ]; + "default" = [ "byteorder" ]; + "derive" = [ "zerocopy-derive" ]; + "simd-nightly" = [ "simd" ]; + "zerocopy-derive" = [ "dep:zerocopy-derive" ]; + }; + resolvedDefaultFeatures = [ "simd" ]; + }; + "zerocopy-derive" = rec { + crateName = "zerocopy-derive"; + version = "0.7.34"; + edition = "2018"; + sha256 = "0fqvglw01w3hp7xj9gdk1800x9j7v58s9w8ijiyiz2a7krb39s8m"; + procMacro = true; + authors = [ + "Joshua Liebow-Feeser <joshlf@google.com>" + ]; + dependencies = [ + { + name = "proc-macro2"; + packageId = "proc-macro2"; + } + { + name = "quote"; + packageId = "quote"; + } + { + name = "syn"; + packageId = "syn 2.0.48"; + } + ]; + + }; "zeroize" = rec { crateName = "zeroize"; version = "1.7.0"; @@ -17085,8 +17234,9 @@ rec { # because we compiled those test binaries in the former and not the latter. # So all paths will expect source tree to be there and not in the build top directly. # For example: $NIX_BUILD_TOP := /build in general, if you ask yourself. - # TODO(raitobezarius): I believe there could be more edge cases if `crate.sourceRoot` - # do exist but it's very hard to reason about them, so let's wait until the first bug report. + # NOTE: There could be edge cases if `crate.sourceRoot` does exist but + # it's very hard to reason about them. + # Open a bug if you run into this! mkdir -p source/ cd source/ diff --git a/tvix/Cargo.toml b/tvix/Cargo.toml index 6cd19831dc..847d9aceec 100644 --- a/tvix/Cargo.toml +++ b/tvix/Cargo.toml @@ -30,6 +30,11 @@ members = [ "store", ] +[workspace.lints.clippy] +# Allow blocks_in_conditions due to false positives with #[tracing::instrument(…)]: +# https://github.com/rust-lang/rust-clippy/issues/12281 +blocks_in_conditions = "allow" + # Add a profile to all targets that enables release optimisations, but # retains debug symbols. This is great for use with # benchmarking/profiling tools. diff --git a/tvix/boot/tests/default.nix b/tvix/boot/tests/default.nix index d16dba79f1..5c7f97a1ce 100644 --- a/tvix/boot/tests/default.nix +++ b/tvix/boot/tests/default.nix @@ -109,18 +109,13 @@ depot.nix.readTree.drvTargets path = ../../docs; importPathName = "docs"; }); - docs-sled = (mkBootTest { - blobServiceAddr = "sled://$PWD/blobs.sled"; + docs-persistent = (mkBootTest { + blobServiceAddr = "objectstore+file://$PWD/blobs"; directoryServiceAddr = "sled://$PWD/directories.sled"; pathInfoServiceAddr = "sled://$PWD/pathinfo.sled"; path = ../../docs; importPathName = "docs"; }); - docs-objectstore-local = (mkBootTest { - blobServiceAddr = "objectstore+file://$PWD/blobs"; - path = ../../docs; - importPathName = "docs"; - }); closure-tvix = (mkBootTest { blobServiceAddr = "objectstore+file://$PWD/blobs"; diff --git a/tvix/build/Cargo.toml b/tvix/build/Cargo.toml index 626fd35d77..dc676b1813 100644 --- a/tvix/build/Cargo.toml +++ b/tvix/build/Cargo.toml @@ -31,3 +31,6 @@ tonic-reflection = ["dep:tonic-reflection"] [dev-dependencies] rstest = "0.19.0" + +[lints] +workspace = true diff --git a/tvix/castore/Cargo.toml b/tvix/castore/Cargo.toml index f54bb2ddb5..1b317be739 100644 --- a/tvix/castore/Cargo.toml +++ b/tvix/castore/Cargo.toml @@ -112,3 +112,10 @@ virtiofs = [ ] fuse = ["fs"] tonic-reflection = ["dep:tonic-reflection"] +# Whether to run the integration tests. +# Requires the following packages in $PATH: +# cbtemulator, google-cloud-bigtable-tool +integration = [] + +[lints] +workspace = true diff --git a/tvix/castore/default.nix b/tvix/castore/default.nix index edc20ac79d..641d883760 100644 --- a/tvix/castore/default.nix +++ b/tvix/castore/default.nix @@ -1,12 +1,23 @@ { depot, pkgs, ... }: -depot.tvix.crates.workspaceMembers.tvix-castore.build.override { +(depot.tvix.crates.workspaceMembers.tvix-castore.build.override { runTests = true; testPreRun = '' export SSL_CERT_FILE=${pkgs.cacert}/etc/ssl/certs/ca-bundle.crt; - export PATH="$PATH:${pkgs.lib.makeBinPath [pkgs.cbtemulator pkgs.google-cloud-bigtable-tool]}" ''; # enable some optional features. features = [ "default" "cloud" ]; -} +}).overrideAttrs (_: { + meta.ci.targets = [ "integration-tests" ]; + passthru.integration-tests = depot.tvix.crates.workspaceMembers.tvix-castore.build.override { + runTests = true; + testPreRun = '' + export SSL_CERT_FILE=${pkgs.cacert}/etc/ssl/certs/ca-bundle.crt; + export PATH="$PATH:${pkgs.lib.makeBinPath [pkgs.cbtemulator pkgs.google-cloud-bigtable-tool]}" + ''; + + # enable some optional features. + features = [ "default" "cloud" "integration" ]; + }; +}) diff --git a/tvix/castore/src/blobservice/from_addr.rs b/tvix/castore/src/blobservice/from_addr.rs index 3e3f943e59..8898bbfb95 100644 --- a/tvix/castore/src/blobservice/from_addr.rs +++ b/tvix/castore/src/blobservice/from_addr.rs @@ -2,15 +2,12 @@ use url::Url; use crate::{proto::blob_service_client::BlobServiceClient, Error}; -use super::{ - BlobService, GRPCBlobService, MemoryBlobService, ObjectStoreBlobService, SledBlobService, -}; +use super::{BlobService, GRPCBlobService, MemoryBlobService, ObjectStoreBlobService}; /// Constructs a new instance of a [BlobService] from an URI. /// /// The following schemes are supported by the following services: /// - `memory://` ([MemoryBlobService]) -/// - `sled://` ([SledBlobService]) /// - `grpc+*://` ([GRPCBlobService]) /// - `objectstore+*://` ([ObjectStoreBlobService]) /// @@ -27,27 +24,6 @@ pub async fn from_addr(uri: &str) -> Result<Box<dyn BlobService>, crate::Error> } Box::<MemoryBlobService>::default() } - "sled" => { - // sled doesn't support host, and a path can be provided (otherwise - // it'll live in memory only). - if url.has_host() { - return Err(Error::StorageError("no host allowed".to_string())); - } - - if url.path() == "/" { - return Err(Error::StorageError( - "cowardly refusing to open / with sled".to_string(), - )); - } - - // TODO: expose other parameters as URL parameters? - - Box::new(if url.path().is_empty() { - SledBlobService::new_temporary().map_err(|e| Error::StorageError(e.to_string()))? - } else { - SledBlobService::new(url.path()).map_err(|e| Error::StorageError(e.to_string()))? - }) - } scheme if scheme.starts_with("grpc+") => { // schemes starting with grpc+ go to the GRPCPathInfoService. // That's normally grpc+unix for unix sockets, and grpc+http(s) for the HTTP counterparts. @@ -83,28 +59,11 @@ pub async fn from_addr(uri: &str) -> Result<Box<dyn BlobService>, crate::Error> #[cfg(test)] mod tests { use super::from_addr; - use lazy_static::lazy_static; use rstest::rstest; - use tempfile::TempDir; - - lazy_static! { - static ref TMPDIR_SLED_1: TempDir = TempDir::new().unwrap(); - static ref TMPDIR_SLED_2: TempDir = TempDir::new().unwrap(); - } #[rstest] /// This uses an unsupported scheme. #[case::unsupported_scheme("http://foo.example/test", false)] - /// This configures sled in temporary mode. - #[case::sled_temporary("sled://", true)] - /// This configures sled with /, which should fail. - #[case::sled_invalid_root("sled:///", false)] - /// This configures sled with a host, not path, which should fail. - #[case::sled_invalid_host("sled://foo.example", false)] - /// This configures sled with a valid path path, which should succeed. - #[case::sled_valid_path(&format!("sled://{}", &TMPDIR_SLED_1.path().to_str().unwrap()), true)] - /// This configures sled with a host, and a valid path path, which should fail. - #[case::sled_invalid_host_with_valid_path(&format!("sled://foo.example{}", &TMPDIR_SLED_2.path().to_str().unwrap()), false)] /// This correctly sets the scheme, and doesn't set a path. #[case::memory_valid("memory://", true)] /// This sets a memory url host to `foo` diff --git a/tvix/castore/src/blobservice/memory.rs b/tvix/castore/src/blobservice/memory.rs index 25eec334de..873d06b461 100644 --- a/tvix/castore/src/blobservice/memory.rs +++ b/tvix/castore/src/blobservice/memory.rs @@ -1,9 +1,7 @@ +use parking_lot::RwLock; use std::io::{self, Cursor, Write}; use std::task::Poll; -use std::{ - collections::HashMap, - sync::{Arc, RwLock}, -}; +use std::{collections::HashMap, sync::Arc}; use tonic::async_trait; use tracing::instrument; @@ -19,13 +17,13 @@ pub struct MemoryBlobService { impl BlobService for MemoryBlobService { #[instrument(skip_all, ret, err, fields(blob.digest=%digest))] async fn has(&self, digest: &B3Digest) -> io::Result<bool> { - let db = self.db.read().unwrap(); + let db = self.db.read(); Ok(db.contains_key(digest)) } #[instrument(skip_all, err, fields(blob.digest=%digest))] async fn open_read(&self, digest: &B3Digest) -> io::Result<Option<Box<dyn BlobReader>>> { - let db = self.db.read().unwrap(); + let db = self.db.read(); match db.get(digest).map(|x| Cursor::new(x.clone())) { Some(result) => Ok(Some(Box::new(result))), @@ -109,24 +107,16 @@ impl BlobWriter for MemoryBlobWriter { } else { let (buf, hasher) = self.writers.take().unwrap(); - // We know self.hasher is doing blake3 hashing, so this won't fail. let digest: B3Digest = hasher.finalize().as_bytes().into(); // Only insert if the blob doesn't already exist. - let db = self.db.read().map_err(|e| { - io::Error::new(io::ErrorKind::BrokenPipe, format!("RwLock poisoned: {}", e)) - })?; + let mut db = self.db.upgradable_read(); if !db.contains_key(&digest) { - // drop the read lock, so we can open for writing. - drop(db); - // open the database for writing. - let mut db = self.db.write().map_err(|e| { - io::Error::new(io::ErrorKind::BrokenPipe, format!("RwLock poisoned: {}", e)) - })?; - - // and put buf in there. This will move buf out. - db.insert(digest.clone(), buf); + db.with_upgraded(|db| { + // and put buf in there. This will move buf out. + db.insert(digest.clone(), buf); + }); } self.digest = Some(digest.clone()); diff --git a/tvix/castore/src/blobservice/mod.rs b/tvix/castore/src/blobservice/mod.rs index 4ba56a4af7..50acd40bf7 100644 --- a/tvix/castore/src/blobservice/mod.rs +++ b/tvix/castore/src/blobservice/mod.rs @@ -11,7 +11,6 @@ mod grpc; mod memory; mod naive_seeker; mod object_store; -mod sled; #[cfg(test)] pub mod tests; @@ -22,7 +21,6 @@ pub use self::from_addr::from_addr; pub use self::grpc::GRPCBlobService; pub use self::memory::MemoryBlobService; pub use self::object_store::ObjectStoreBlobService; -pub use self::sled::SledBlobService; /// The base trait all BlobService services need to implement. /// It provides functions to check whether a given blob exists, diff --git a/tvix/castore/src/blobservice/sled.rs b/tvix/castore/src/blobservice/sled.rs deleted file mode 100644 index 3dd4bff7bc..0000000000 --- a/tvix/castore/src/blobservice/sled.rs +++ /dev/null @@ -1,150 +0,0 @@ -use super::{BlobReader, BlobService, BlobWriter}; -use crate::{B3Digest, Error}; -use std::{ - io::{self, Cursor, Write}, - path::Path, - task::Poll, -}; -use tonic::async_trait; -use tracing::instrument; - -#[derive(Clone)] -pub struct SledBlobService { - db: sled::Db, -} - -impl SledBlobService { - pub fn new<P: AsRef<Path>>(p: P) -> Result<Self, sled::Error> { - let config = sled::Config::default() - .use_compression(false) // is a required parameter - .path(p); - let db = config.open()?; - - Ok(Self { db }) - } - - pub fn new_temporary() -> Result<Self, sled::Error> { - let config = sled::Config::default().temporary(true); - let db = config.open()?; - - Ok(Self { db }) - } -} - -#[async_trait] -impl BlobService for SledBlobService { - #[instrument(skip(self), fields(blob.digest=%digest))] - async fn has(&self, digest: &B3Digest) -> io::Result<bool> { - match self.db.contains_key(digest.as_slice()) { - Ok(has) => Ok(has), - Err(e) => Err(io::Error::new(io::ErrorKind::Other, e.to_string())), - } - } - - #[instrument(skip(self), fields(blob.digest=%digest))] - async fn open_read(&self, digest: &B3Digest) -> io::Result<Option<Box<dyn BlobReader>>> { - match self.db.get(digest.as_slice()) { - Ok(None) => Ok(None), - Ok(Some(data)) => Ok(Some(Box::new(Cursor::new(data[..].to_vec())))), - Err(e) => Err(io::Error::new(io::ErrorKind::Other, e.to_string())), - } - } - - #[instrument(skip(self))] - async fn open_write(&self) -> Box<dyn BlobWriter> { - Box::new(SledBlobWriter::new(self.db.clone())) - } -} - -pub struct SledBlobWriter { - db: sled::Db, - - /// Contains the buffer Vec and hasher, or None if already closed - writers: Option<(Vec<u8>, blake3::Hasher)>, - - /// The digest that has been returned, if we successfully closed. - digest: Option<B3Digest>, -} - -impl SledBlobWriter { - pub fn new(db: sled::Db) -> Self { - Self { - db, - writers: Some((Vec::new(), blake3::Hasher::new())), - digest: None, - } - } -} - -impl tokio::io::AsyncWrite for SledBlobWriter { - fn poll_write( - mut self: std::pin::Pin<&mut Self>, - _cx: &mut std::task::Context<'_>, - b: &[u8], - ) -> std::task::Poll<Result<usize, io::Error>> { - Poll::Ready(match &mut self.writers { - None => Err(io::Error::new( - io::ErrorKind::NotConnected, - "already closed", - )), - Some((ref mut buf, ref mut hasher)) => { - let bytes_written = buf.write(b)?; - hasher.write(&b[..bytes_written]) - } - }) - } - - fn poll_flush( - mut self: std::pin::Pin<&mut Self>, - _cx: &mut std::task::Context<'_>, - ) -> std::task::Poll<Result<(), io::Error>> { - Poll::Ready(match &mut self.writers { - None => Err(io::Error::new( - io::ErrorKind::NotConnected, - "already closed", - )), - Some(_) => Ok(()), - }) - } - - fn poll_shutdown( - self: std::pin::Pin<&mut Self>, - _cx: &mut std::task::Context<'_>, - ) -> std::task::Poll<Result<(), io::Error>> { - // shutdown is "instantaneous", we only write to a Vec<u8> as buffer. - Poll::Ready(Ok(())) - } -} - -#[async_trait] -impl BlobWriter for SledBlobWriter { - async fn close(&mut self) -> io::Result<B3Digest> { - if self.writers.is_none() { - match &self.digest { - Some(digest) => Ok(digest.clone()), - None => Err(io::Error::new( - io::ErrorKind::NotConnected, - "already closed", - )), - } - } else { - let (buf, hasher) = self.writers.take().unwrap(); - - let digest: B3Digest = hasher.finalize().as_bytes().into(); - - // Only insert if the blob doesn't already exist. - if !self.db.contains_key(digest.as_slice()).map_err(|e| { - Error::StorageError(format!("Unable to check if we have blob {}: {}", digest, e)) - })? { - // put buf in there. This will move buf out. - self.db - .insert(digest.as_slice(), buf) - .map_err(|e| Error::StorageError(format!("unable to insert blob: {}", e)))?; - } - - self.digest = Some(digest.clone()); - - Ok(digest) - } - } -} diff --git a/tvix/castore/src/blobservice/tests/mod.rs b/tvix/castore/src/blobservice/tests/mod.rs index 30c4e97634..0280faebb1 100644 --- a/tvix/castore/src/blobservice/tests/mod.rs +++ b/tvix/castore/src/blobservice/tests/mod.rs @@ -25,7 +25,6 @@ use self::utils::make_grpc_blob_service_client; #[case::grpc(make_grpc_blob_service_client().await)] #[case::memory(blobservice::from_addr("memory://").await.unwrap())] #[case::objectstore_memory(blobservice::from_addr("objectstore+memory://").await.unwrap())] -#[case::sled(blobservice::from_addr("sled://").await.unwrap())] pub fn blob_services(#[case] blob_service: impl BlobService) {} /// Using [BlobService::has] on a non-existing blob should return false. diff --git a/tvix/castore/src/directoryservice/bigtable.rs b/tvix/castore/src/directoryservice/bigtable.rs index 0fdb24628f..1194c6ddc9 100644 --- a/tvix/castore/src/directoryservice/bigtable.rs +++ b/tvix/castore/src/directoryservice/bigtable.rs @@ -343,7 +343,7 @@ impl DirectoryService for BigtableDirectoryService { fn get_recursive( &self, root_directory_digest: &B3Digest, - ) -> BoxStream<Result<proto::Directory, Error>> { + ) -> BoxStream<'static, Result<proto::Directory, Error>> { traverse_directory(self.clone(), root_directory_digest) } diff --git a/tvix/castore/src/directoryservice/from_addr.rs b/tvix/castore/src/directoryservice/from_addr.rs index 31158d3a38..ae51df6376 100644 --- a/tvix/castore/src/directoryservice/from_addr.rs +++ b/tvix/castore/src/directoryservice/from_addr.rs @@ -144,7 +144,7 @@ mod tests { #[case::grpc_invalid_host_and_path("grpc+http://localhost/some-path", false)] /// A valid example for Bigtable #[cfg_attr( - feature = "cloud", + all(feature = "cloud", feature = "integration"), case::bigtable_valid_url( "bigtable://instance-1?project_id=project-1&table_name=table-1&family_name=cf1", true @@ -152,7 +152,7 @@ mod tests { )] /// A valid example for Bigtable, specifying a custom channel size and timeout #[cfg_attr( - feature = "cloud", + all(feature = "cloud", feature = "integration"), case::bigtable_valid_url( "bigtable://instance-1?project_id=project-1&table_name=table-1&family_name=cf1&channel_size=10&timeout=10", true @@ -160,7 +160,7 @@ mod tests { )] /// A invalid Bigtable example (missing fields) #[cfg_attr( - feature = "cloud", + all(feature = "cloud", feature = "integration"), case::bigtable_invalid_url("bigtable://instance-1", false) )] #[tokio::test] diff --git a/tvix/castore/src/directoryservice/grpc.rs b/tvix/castore/src/directoryservice/grpc.rs index 7402fe1b56..fe935629bf 100644 --- a/tvix/castore/src/directoryservice/grpc.rs +++ b/tvix/castore/src/directoryservice/grpc.rs @@ -107,7 +107,7 @@ impl DirectoryService for GRPCDirectoryService { fn get_recursive( &self, root_directory_digest: &B3Digest, - ) -> BoxStream<Result<proto::Directory, Error>> { + ) -> BoxStream<'static, Result<proto::Directory, Error>> { let mut grpc_client = self.grpc_client.clone(); let root_directory_digest = root_directory_digest.clone(); diff --git a/tvix/castore/src/directoryservice/memory.rs b/tvix/castore/src/directoryservice/memory.rs index 2cbbbd1b16..3b2795c396 100644 --- a/tvix/castore/src/directoryservice/memory.rs +++ b/tvix/castore/src/directoryservice/memory.rs @@ -1,7 +1,8 @@ use crate::{proto, B3Digest, Error}; use futures::stream::BoxStream; use std::collections::HashMap; -use std::sync::{Arc, RwLock}; +use std::sync::Arc; +use tokio::sync::RwLock; use tonic::async_trait; use tracing::{instrument, warn}; @@ -17,7 +18,7 @@ pub struct MemoryDirectoryService { impl DirectoryService for MemoryDirectoryService { #[instrument(skip(self, digest), fields(directory.digest = %digest))] async fn get(&self, digest: &B3Digest) -> Result<Option<proto::Directory>, Error> { - let db = self.db.read()?; + let db = self.db.read().await; match db.get(digest) { // The directory was not found, return @@ -62,7 +63,7 @@ impl DirectoryService for MemoryDirectoryService { } // store it - let mut db = self.db.write()?; + let mut db = self.db.write().await; db.insert(digest.clone(), directory); Ok(digest) @@ -72,7 +73,7 @@ impl DirectoryService for MemoryDirectoryService { fn get_recursive( &self, root_directory_digest: &B3Digest, - ) -> BoxStream<Result<proto::Directory, Error>> { + ) -> BoxStream<'static, Result<proto::Directory, Error>> { traverse_directory(self.clone(), root_directory_digest) } diff --git a/tvix/castore/src/directoryservice/mod.rs b/tvix/castore/src/directoryservice/mod.rs index cf6bea39d8..ca82ff2bc9 100644 --- a/tvix/castore/src/directoryservice/mod.rs +++ b/tvix/castore/src/directoryservice/mod.rs @@ -64,7 +64,7 @@ pub trait DirectoryService: Send + Sync { fn get_recursive( &self, root_directory_digest: &B3Digest, - ) -> BoxStream<Result<proto::Directory, Error>>; + ) -> BoxStream<'static, Result<proto::Directory, Error>>; /// Allows persisting a closure of [proto::Directory], which is a graph of /// connected Directory messages. @@ -87,7 +87,7 @@ where fn get_recursive( &self, root_directory_digest: &B3Digest, - ) -> BoxStream<Result<proto::Directory, Error>> { + ) -> BoxStream<'static, Result<proto::Directory, Error>> { self.as_ref().get_recursive(root_directory_digest) } diff --git a/tvix/castore/src/directoryservice/sled.rs b/tvix/castore/src/directoryservice/sled.rs index e4a4c2bbed..9490a49c00 100644 --- a/tvix/castore/src/directoryservice/sled.rs +++ b/tvix/castore/src/directoryservice/sled.rs @@ -37,12 +37,23 @@ impl SledDirectoryService { impl DirectoryService for SledDirectoryService { #[instrument(skip(self, digest), fields(directory.digest = %digest))] async fn get(&self, digest: &B3Digest) -> Result<Option<proto::Directory>, Error> { - match self.db.get(digest.as_slice()) { + let resp = tokio::task::spawn_blocking({ + let db = self.db.clone(); + let digest = digest.clone(); + move || db.get(digest.as_slice()) + }) + .await? + .map_err(|e| { + warn!("failed to retrieve directory: {}", e); + Error::StorageError(format!("failed to retrieve directory: {}", e)) + })?; + + match resp { // The directory was not found, return - Ok(None) => Ok(None), + None => Ok(None), // The directory was found, try to parse the data as Directory message - Ok(Some(data)) => match Directory::decode(&*data) { + Some(data) => match Directory::decode(&*data) { Ok(directory) => { // Validate the retrieved Directory indeed has the // digest we expect it to have, to detect corruptions. @@ -70,35 +81,38 @@ impl DirectoryService for SledDirectoryService { Err(Error::StorageError(e.to_string())) } }, - // some storage error? - Err(e) => Err(Error::StorageError(e.to_string())), } } #[instrument(skip(self, directory), fields(directory.digest = %directory.digest()))] async fn put(&self, directory: proto::Directory) -> Result<B3Digest, Error> { - let digest = directory.digest(); - - // validate the directory itself. - if let Err(e) = directory.validate() { - return Err(Error::InvalidRequest(format!( - "directory {} failed validation: {}", - digest, e, - ))); - } - // store it - let result = self.db.insert(digest.as_slice(), directory.encode_to_vec()); - if let Err(e) = result { - return Err(Error::StorageError(e.to_string())); - } - Ok(digest) + tokio::task::spawn_blocking({ + let db = self.db.clone(); + move || { + let digest = directory.digest(); + + // validate the directory itself. + if let Err(e) = directory.validate() { + return Err(Error::InvalidRequest(format!( + "directory {} failed validation: {}", + digest, e, + ))); + } + // store it + db.insert(digest.as_slice(), directory.encode_to_vec()) + .map_err(|e| Error::StorageError(e.to_string()))?; + + Ok(digest) + } + }) + .await? } #[instrument(skip_all, fields(directory.digest = %root_directory_digest))] fn get_recursive( &self, root_directory_digest: &B3Digest, - ) -> BoxStream<Result<proto::Directory, Error>> { + ) -> BoxStream<'static, Result<proto::Directory, Error>> { traverse_directory(self.clone(), root_directory_digest) } @@ -143,25 +157,32 @@ impl DirectoryPutter for SledDirectoryPutter { match self.directory_validator.take() { None => Err(Error::InvalidRequest("already closed".to_string())), Some(validator) => { - // retrieve the validated directories. - let directories = validator.finalize()?; - - // Get the root digest, which is at the end (cf. insertion order) - let root_digest = directories - .last() - .ok_or_else(|| Error::InvalidRequest("got no directories".to_string()))? - .digest(); - - let mut batch = sled::Batch::default(); - for directory in directories { - batch.insert(directory.digest().as_slice(), directory.encode_to_vec()); - } - - self.tree - .apply_batch(batch) - .map_err(|e| Error::StorageError(format!("unable to apply batch: {}", e)))?; - - Ok(root_digest) + // Insert all directories as a batch. + tokio::task::spawn_blocking({ + let tree = self.tree.clone(); + move || { + // retrieve the validated directories. + let directories = validator.finalize()?; + + // Get the root digest, which is at the end (cf. insertion order) + let root_digest = directories + .last() + .ok_or_else(|| Error::InvalidRequest("got no directories".to_string()))? + .digest(); + + let mut batch = sled::Batch::default(); + for directory in directories { + batch.insert(directory.digest().as_slice(), directory.encode_to_vec()); + } + + tree.apply_batch(batch).map_err(|e| { + Error::StorageError(format!("unable to apply batch: {}", e)) + })?; + + Ok(root_digest) + } + }) + .await? } } } diff --git a/tvix/castore/src/directoryservice/tests/mod.rs b/tvix/castore/src/directoryservice/tests/mod.rs index 50c8a5c6d3..1b40d9feb0 100644 --- a/tvix/castore/src/directoryservice/tests/mod.rs +++ b/tvix/castore/src/directoryservice/tests/mod.rs @@ -26,7 +26,7 @@ use self::utils::make_grpc_directory_service_client; #[case::grpc(make_grpc_directory_service_client().await)] #[case::memory(directoryservice::from_addr("memory://").await.unwrap())] #[case::sled(directoryservice::from_addr("sled://").await.unwrap())] -#[cfg_attr(feature = "cloud", case::bigtable(directoryservice::from_addr("bigtable://instance-1?project_id=project-1&table_name=table-1&family_name=cf1").await.unwrap()))] +#[cfg_attr(all(feature = "cloud", feature = "integration"), case::bigtable(directoryservice::from_addr("bigtable://instance-1?project_id=project-1&table_name=table-1&family_name=cf1").await.unwrap()))] pub fn directory_services(#[case] directory_service: impl DirectoryService) {} /// Ensures asking for a directory that doesn't exist returns a Ok(None). diff --git a/tvix/castore/src/directoryservice/traverse.rs b/tvix/castore/src/directoryservice/traverse.rs index 573581edbd..17a51ae2bb 100644 --- a/tvix/castore/src/directoryservice/traverse.rs +++ b/tvix/castore/src/directoryservice/traverse.rs @@ -1,95 +1,72 @@ use super::DirectoryService; -use crate::{proto::NamedNode, B3Digest, Error}; -use std::os::unix::ffi::OsStrExt; +use crate::{ + proto::{node::Node, NamedNode}, + B3Digest, Error, Path, +}; use tracing::{instrument, warn}; /// This descends from a (root) node to the given (sub)path, returning the Node /// at that path, or none, if there's nothing at that path. -#[instrument(skip(directory_service))] +#[instrument(skip(directory_service, path), fields(%path))] pub async fn descend_to<DS>( directory_service: DS, - root_node: crate::proto::node::Node, - path: &std::path::Path, -) -> Result<Option<crate::proto::node::Node>, Error> + root_node: Node, + path: impl AsRef<Path> + std::fmt::Display, +) -> Result<Option<Node>, Error> where DS: AsRef<dyn DirectoryService>, { - // strip a possible `/` prefix from the path. - let path = { - if path.starts_with("/") { - path.strip_prefix("/").unwrap() - } else { - path - } - }; - - let mut cur_node = root_node; - let mut it = path.components(); - - loop { - match it.next() { - None => { - // the (remaining) path is empty, return the node we're current at. - return Ok(Some(cur_node)); + let mut parent_node = root_node; + for component in path.as_ref().components() { + match parent_node { + Node::File(_) | Node::Symlink(_) => { + // There's still some path left, but the parent node is no directory. + // This means the path doesn't exist, as we can't reach it. + return Ok(None); } - Some(first_component) => { - match cur_node { - crate::proto::node::Node::File(_) | crate::proto::node::Node::Symlink(_) => { - // There's still some path left, but the current node is no directory. - // This means the path doesn't exist, as we can't reach it. - return Ok(None); - } - crate::proto::node::Node::Directory(directory_node) => { - let digest: B3Digest = directory_node.digest.try_into().map_err(|_e| { - Error::StorageError("invalid digest length".to_string()) + Node::Directory(directory_node) => { + let digest: B3Digest = directory_node + .digest + .try_into() + .map_err(|_e| Error::StorageError("invalid digest length".to_string()))?; + + // fetch the linked node from the directory_service. + let directory = + directory_service + .as_ref() + .get(&digest) + .await? + .ok_or_else(|| { + // If we didn't get the directory node that's linked, that's a store inconsistency, bail out! + warn!("directory {} does not exist", digest); + + Error::StorageError(format!("directory {} does not exist", digest)) })?; - // fetch the linked node from the directory_service - match directory_service.as_ref().get(&digest).await? { - // If we didn't get the directory node that's linked, that's a store inconsistency, bail out! - None => { - warn!("directory {} does not exist", digest); - - return Err(Error::StorageError(format!( - "directory {} does not exist", - digest - ))); - } - Some(directory) => { - // look for first_component in the [Directory]. - // FUTUREWORK: as the nodes() iterator returns in a sorted fashion, we - // could stop as soon as e.name is larger than the search string. - let child_node = directory.nodes().find(|n| { - n.get_name() == first_component.as_os_str().as_bytes() - }); - - match child_node { - // child node not found means there's no such element inside the directory. - None => { - return Ok(None); - } - // child node found, return to top-of loop to find the next - // node in the path. - Some(child_node) => { - cur_node = child_node; - } - } - } - } - } + // look for the component in the [Directory]. + // FUTUREWORK: as the nodes() iterator returns in a sorted fashion, we + // could stop as soon as e.name is larger than the search string. + if let Some(child_node) = directory.nodes().find(|n| n.get_name() == component) { + // child node found, update prev_node to that and continue. + parent_node = child_node; + } else { + // child node not found means there's no such element inside the directory. + return Ok(None); } } } } + + // We traversed the entire path, so this must be the node. + Ok(Some(parent_node)) } #[cfg(test)] mod tests { - use std::path::PathBuf; - use crate::{ directoryservice, fixtures::{DIRECTORY_COMPLICATED, DIRECTORY_WITH_KEEP}, + PathBuf, }; use super::descend_to; @@ -132,7 +109,7 @@ mod tests { let resp = descend_to( &directory_service, node_directory_complicated.clone(), - &PathBuf::from(""), + "".parse::<PathBuf>().unwrap(), ) .await .expect("must succeed"); @@ -145,7 +122,7 @@ mod tests { let resp = descend_to( &directory_service, node_directory_complicated.clone(), - &PathBuf::from("keep"), + "keep".parse::<PathBuf>().unwrap(), ) .await .expect("must succeed"); @@ -158,7 +135,7 @@ mod tests { let resp = descend_to( &directory_service, node_directory_complicated.clone(), - &PathBuf::from("keep/.keep"), + "keep/.keep".parse::<PathBuf>().unwrap(), ) .await .expect("must succeed"); @@ -166,25 +143,12 @@ mod tests { assert_eq!(Some(node_file_keep.clone()), resp); } - // traversal to `keep/.keep` should return the node for the .keep file - { - let resp = descend_to( - &directory_service, - node_directory_complicated.clone(), - &PathBuf::from("/keep/.keep"), - ) - .await - .expect("must succeed"); - - assert_eq!(Some(node_file_keep), resp); - } - // traversal to `void` should return None (doesn't exist) { let resp = descend_to( &directory_service, node_directory_complicated.clone(), - &PathBuf::from("void"), + "void".parse::<PathBuf>().unwrap(), ) .await .expect("must succeed"); @@ -192,12 +156,12 @@ mod tests { assert_eq!(None, resp); } - // traversal to `void` should return None (doesn't exist) + // traversal to `v/oid` should return None (doesn't exist) { let resp = descend_to( &directory_service, node_directory_complicated.clone(), - &PathBuf::from("//v/oid"), + "v/oid".parse::<PathBuf>().unwrap(), ) .await .expect("must succeed"); @@ -211,25 +175,12 @@ mod tests { let resp = descend_to( &directory_service, node_directory_complicated.clone(), - &PathBuf::from("keep/.keep/foo"), + "keep/.keep/foo".parse::<PathBuf>().unwrap(), ) .await .expect("must succeed"); assert_eq!(None, resp); } - - // traversal to a subpath of '/' should return the root node. - { - let resp = descend_to( - &directory_service, - node_directory_complicated.clone(), - &PathBuf::from("/"), - ) - .await - .expect("must succeed"); - - assert_eq!(Some(node_directory_complicated), resp); - } } } diff --git a/tvix/castore/src/directoryservice/utils.rs b/tvix/castore/src/directoryservice/utils.rs index 01c521076c..a0ba395ecd 100644 --- a/tvix/castore/src/directoryservice/utils.rs +++ b/tvix/castore/src/directoryservice/utils.rs @@ -2,14 +2,16 @@ use super::DirectoryService; use crate::proto; use crate::B3Digest; use crate::Error; -use async_stream::stream; +use async_stream::try_stream; use futures::stream::BoxStream; use std::collections::{HashSet, VecDeque}; +use tracing::instrument; use tracing::warn; /// Traverses a [proto::Directory] from the root to the children. /// /// This is mostly BFS, but directories are only returned once. +#[instrument(skip(directory_service))] pub fn traverse_directory<'a, DS: DirectoryService + 'static>( directory_service: DS, root_directory_digest: &B3Digest, @@ -23,60 +25,53 @@ pub fn traverse_directory<'a, DS: DirectoryService + 'static>( // We omit sending the same directories multiple times. let mut sent_directory_digests: HashSet<B3Digest> = HashSet::new(); - let stream = stream! { + Box::pin(try_stream! { while let Some(current_directory_digest) = worklist_directory_digests.pop_front() { - match directory_service.get(¤t_directory_digest).await { + let current_directory = directory_service.get(¤t_directory_digest).await.map_err(|e| { + warn!("failed to look up directory"); + Error::StorageError(format!( + "unable to look up directory {}: {}", + current_directory_digest, e + )) + })?.ok_or_else(|| { // if it's not there, we have an inconsistent store! - Ok(None) => { - warn!("directory {} does not exist", current_directory_digest); - yield Err(Error::StorageError(format!( - "directory {} does not exist", - current_directory_digest - ))); - } - Err(e) => { - warn!("failed to look up directory"); - yield Err(Error::StorageError(format!( - "unable to look up directory {}: {}", - current_directory_digest, e - ))); - } + warn!("directory {} does not exist", current_directory_digest); + Error::StorageError(format!( + "directory {} does not exist", + current_directory_digest + )) - // if we got it - Ok(Some(current_directory)) => { - // validate, we don't want to send invalid directories. - if let Err(e) = current_directory.validate() { - warn!("directory failed validation: {}", e.to_string()); - yield Err(Error::StorageError(format!( - "invalid directory: {}", - current_directory_digest - ))); - } + })?; - // We're about to send this directory, so let's avoid sending it again if a - // descendant has it. - sent_directory_digests.insert(current_directory_digest); + // validate, we don't want to send invalid directories. + current_directory.validate().map_err(|e| { + warn!("directory failed validation: {}", e.to_string()); + Error::StorageError(format!( + "invalid directory: {}", + current_directory_digest + )) + })?; - // enqueue all child directory digests to the work queue, as - // long as they're not part of the worklist or already sent. - // This panics if the digest looks invalid, it's supposed to be checked first. - for child_directory_node in ¤t_directory.directories { - // TODO: propagate error - let child_digest: B3Digest = child_directory_node.digest.clone().try_into().unwrap(); + // We're about to send this directory, so let's avoid sending it again if a + // descendant has it. + sent_directory_digests.insert(current_directory_digest); - if worklist_directory_digests.contains(&child_digest) - || sent_directory_digests.contains(&child_digest) - { - continue; - } - worklist_directory_digests.push_back(child_digest); - } + // enqueue all child directory digests to the work queue, as + // long as they're not part of the worklist or already sent. + // This panics if the digest looks invalid, it's supposed to be checked first. + for child_directory_node in ¤t_directory.directories { + // TODO: propagate error + let child_digest: B3Digest = child_directory_node.digest.clone().try_into().unwrap(); - yield Ok(current_directory); + if worklist_directory_digests.contains(&child_digest) + || sent_directory_digests.contains(&child_digest) + { + continue; } - }; - } - }; + worklist_directory_digests.push_back(child_digest); + } - Box::pin(stream) + yield current_directory; + } + }) } diff --git a/tvix/castore/src/errors.rs b/tvix/castore/src/errors.rs index e807a19b9e..8343d0774a 100644 --- a/tvix/castore/src/errors.rs +++ b/tvix/castore/src/errors.rs @@ -1,4 +1,3 @@ -use std::sync::PoisonError; use thiserror::Error; use tokio::task::JoinError; use tonic::Status; @@ -13,12 +12,6 @@ pub enum Error { StorageError(String), } -impl<T> From<PoisonError<T>> for Error { - fn from(value: PoisonError<T>) -> Self { - Error::StorageError(value.to_string()) - } -} - impl From<JoinError> for Error { fn from(value: JoinError) -> Self { Error::StorageError(value.to_string()) diff --git a/tvix/castore/src/fs/inodes.rs b/tvix/castore/src/fs/inodes.rs index c22bd4b2eb..bdd4595434 100644 --- a/tvix/castore/src/fs/inodes.rs +++ b/tvix/castore/src/fs/inodes.rs @@ -57,16 +57,18 @@ impl InodeData { children.len() as u64 } }, - mode: match self { - InodeData::Regular(_, _, false) => libc::S_IFREG | 0o444, // no-executable files - InodeData::Regular(_, _, true) => libc::S_IFREG | 0o555, // executable files - InodeData::Symlink(_) => libc::S_IFLNK | 0o444, - InodeData::Directory(_) => libc::S_IFDIR | 0o555, - }, + mode: self.as_fuse_type() | self.mode(), ..Default::default() } } + fn mode(&self) -> u32 { + match self { + InodeData::Regular(_, _, false) | InodeData::Symlink(_) => 0o444, + InodeData::Regular(_, _, true) | InodeData::Directory(_) => 0o555, + } + } + pub fn as_fuse_entry(&self, inode: u64) -> fuse_backend_rs::api::filesystem::Entry { fuse_backend_rs::api::filesystem::Entry { inode, diff --git a/tvix/castore/src/fs/virtiofs.rs b/tvix/castore/src/fs/virtiofs.rs index 846270d285..d63e2f2bdd 100644 --- a/tvix/castore/src/fs/virtiofs.rs +++ b/tvix/castore/src/fs/virtiofs.rs @@ -34,6 +34,7 @@ enum Error { /// Invalid descriptor chain. InvalidDescriptorChain, /// Failed to handle filesystem requests. + #[allow(dead_code)] HandleRequests(fuse_backend_rs::Error), /// Failed to construct new vhost user daemon. NewDaemon, diff --git a/tvix/castore/src/import/archive.rs b/tvix/castore/src/import/archive.rs index adcfb871d5..0ebb4a2361 100644 --- a/tvix/castore/src/import/archive.rs +++ b/tvix/castore/src/import/archive.rs @@ -1,6 +1,8 @@ +//! Imports from an archive (tarballs) + +use std::collections::HashMap; use std::io::{Cursor, Write}; use std::sync::Arc; -use std::{collections::HashMap, path::PathBuf}; use petgraph::graph::{DiGraph, NodeIndex}; use petgraph::visit::{DfsPostOrder, EdgeRef}; @@ -15,10 +17,12 @@ use tracing::{instrument, warn, Level}; use crate::blobservice::BlobService; use crate::directoryservice::DirectoryService; -use crate::import::{ingest_entries, Error as ImportError, IngestionEntry}; +use crate::import::{ingest_entries, IngestionEntry, IngestionError}; use crate::proto::node::Node; use crate::B3Digest; +type TarPathBuf = std::path::PathBuf; + /// Files smaller than this threshold, in bytes, are uploaded to the [BlobService] in the /// background. /// @@ -32,20 +36,42 @@ const MAX_TARBALL_BUFFER_SIZE: usize = 128 * 1024 * 1024; #[derive(Debug, thiserror::Error)] pub enum Error { - #[error("error reading archive entry: {0}")] - Io(#[from] std::io::Error), + #[error("unable to construct stream of entries: {0}")] + Entries(std::io::Error), + + #[error("unable to read next entry: {0}")] + NextEntry(std::io::Error), + + #[error("unable to read path for entry: {0}")] + PathRead(std::io::Error), + + #[error("unable to convert path {0} for entry: {1}")] + PathConvert(TarPathBuf, std::io::Error), + + #[error("unable to read size field for {0}: {1}")] + Size(TarPathBuf, std::io::Error), + + #[error("unable to read mode field for {0}: {1}")] + Mode(TarPathBuf, std::io::Error), + + #[error("unable to read link name field for {0}: {1}")] + LinkName(TarPathBuf, std::io::Error), + + #[error("unable to read blob contents for {0}: {1}")] + BlobRead(TarPathBuf, std::io::Error), + + // FUTUREWORK: proper error for blob finalize + #[error("unable to finalize blob {0}: {1}")] + BlobFinalize(TarPathBuf, std::io::Error), #[error("unsupported tar entry {0} type: {1:?}")] - UnsupportedTarEntry(PathBuf, tokio_tar::EntryType), + EntryType(TarPathBuf, tokio_tar::EntryType), #[error("symlink missing target {0}")] - MissingSymlinkTarget(PathBuf), + MissingSymlinkTarget(TarPathBuf), #[error("unexpected number of top level directory entries")] UnexpectedNumberOfTopLevelEntries, - - #[error("failed to import into castore {0}")] - Import(#[from] ImportError), } /// Ingests elements from the given tar [`Archive`] into a the passed [`BlobService`] and @@ -55,10 +81,10 @@ pub async fn ingest_archive<BS, DS, R>( blob_service: BS, directory_service: DS, mut archive: Archive<R>, -) -> Result<Node, Error> +) -> Result<Node, IngestionError<Error>> where BS: BlobService + Clone + 'static, - DS: AsRef<dyn DirectoryService>, + DS: DirectoryService, R: AsyncRead + Unpin, { // Since tarballs can have entries in any arbitrary order, we need to @@ -71,16 +97,22 @@ where let semaphore = Arc::new(Semaphore::new(MAX_TARBALL_BUFFER_SIZE)); let mut async_blob_uploads: JoinSet<Result<(), Error>> = JoinSet::new(); - let mut entries_iter = archive.entries()?; - while let Some(mut entry) = entries_iter.try_next().await? { - let path: PathBuf = entry.path()?.into(); + let mut entries_iter = archive.entries().map_err(Error::Entries)?; + while let Some(mut entry) = entries_iter.try_next().await.map_err(Error::NextEntry)? { + let tar_path: TarPathBuf = entry.path().map_err(Error::PathRead)?.into(); + + // construct a castore PathBuf, which we use in the produced IngestionEntry. + let path = crate::path::PathBuf::from_host_path(tar_path.as_path(), true) + .map_err(|e| Error::PathConvert(tar_path.clone(), e))?; let header = entry.header(); let entry = match header.entry_type() { tokio_tar::EntryType::Regular | tokio_tar::EntryType::GNUSparse | tokio_tar::EntryType::Continuous => { - let header_size = header.size()?; + let header_size = header + .size() + .map_err(|e| Error::Size(tar_path.clone(), e))?; // If the blob is small enough, read it off the wire, compute the digest, // and upload it to the [BlobService] in the background. @@ -101,7 +133,9 @@ where .acquire_many_owned(header_size as u32) .await .unwrap(); - let size = tokio::io::copy(&mut reader, &mut buffer).await?; + let size = tokio::io::copy(&mut reader, &mut buffer) + .await + .map_err(|e| Error::Size(tar_path.clone(), e))?; let digest: B3Digest = hasher.finalize().as_bytes().into(); @@ -109,12 +143,18 @@ where let blob_service = blob_service.clone(); let digest = digest.clone(); async_blob_uploads.spawn({ + let tar_path = tar_path.clone(); async move { let mut writer = blob_service.open_write().await; - tokio::io::copy(&mut Cursor::new(buffer), &mut writer).await?; + tokio::io::copy(&mut Cursor::new(buffer), &mut writer) + .await + .map_err(|e| Error::BlobRead(tar_path.clone(), e))?; - let blob_digest = writer.close().await?; + let blob_digest = writer + .close() + .await + .map_err(|e| Error::BlobFinalize(tar_path, e))?; assert_eq!(digest, blob_digest, "Tvix bug: blob digest mismatch"); @@ -130,35 +170,50 @@ where } else { let mut writer = blob_service.open_write().await; - let size = tokio::io::copy(&mut entry, &mut writer).await?; + let size = tokio::io::copy(&mut entry, &mut writer) + .await + .map_err(|e| Error::BlobRead(tar_path.clone(), e))?; - let digest = writer.close().await?; + let digest = writer + .close() + .await + .map_err(|e| Error::BlobFinalize(tar_path.clone(), e))?; (size, digest) }; + let executable = entry + .header() + .mode() + .map_err(|e| Error::Mode(tar_path, e))? + & 64 + != 0; + IngestionEntry::Regular { path, size, - executable: entry.header().mode()? & 64 != 0, + executable, digest, } } tokio_tar::EntryType::Symlink => IngestionEntry::Symlink { target: entry - .link_name()? - .ok_or_else(|| Error::MissingSymlinkTarget(path.clone()))? - .into(), + .link_name() + .map_err(|e| Error::LinkName(tar_path.clone(), e))? + .ok_or_else(|| Error::MissingSymlinkTarget(tar_path.clone()))? + .into_owned() + .into_os_string() + .into_encoded_bytes(), path, }, // Push a bogus directory marker so we can make sure this directoy gets // created. We don't know the digest and size until after reading the full // tarball. - tokio_tar::EntryType::Directory => IngestionEntry::Dir { path: path.clone() }, + tokio_tar::EntryType::Directory => IngestionEntry::Dir { path }, tokio_tar::EntryType::XGlobalHeader | tokio_tar::EntryType::XHeader => continue, - entry_type => return Err(Error::UnsupportedTarEntry(path, entry_type)), + entry_type => return Err(Error::EntryType(tar_path, entry_type).into()), }; nodes.add(entry)?; @@ -193,7 +248,7 @@ where /// An error is returned if this is not the case and ingestion will fail. struct IngestionEntryGraph { graph: DiGraph<IngestionEntry, ()>, - path_to_index: HashMap<PathBuf, NodeIndex>, + path_to_index: HashMap<crate::path::PathBuf, NodeIndex>, root_node: Option<NodeIndex>, } @@ -218,7 +273,7 @@ impl IngestionEntryGraph { /// and new nodes are not directories, the node is replaced and is disconnected from its /// children. pub fn add(&mut self, entry: IngestionEntry) -> Result<NodeIndex, Error> { - let path = entry.path().to_path_buf(); + let path = entry.path().to_owned(); let index = match self.path_to_index.get(entry.path()) { Some(&index) => { @@ -233,12 +288,12 @@ impl IngestionEntryGraph { None => self.graph.add_node(entry), }; - // A path with 1 component is the root node + // for archives, a path with 1 component is the root node if path.components().count() == 1 { // We expect archives to contain a single root node, if there is another root node // entry with a different path name, this is unsupported. if let Some(root_node) = self.root_node { - if self.get_node(root_node).path() != path { + if self.get_node(root_node).path() != path.as_ref() { return Err(Error::UnexpectedNumberOfTopLevelEntries); } } @@ -247,7 +302,7 @@ impl IngestionEntryGraph { } else if let Some(parent_path) = path.parent() { // Recursively add the parent node until it hits the root node. let parent_index = self.add(IngestionEntry::Dir { - path: parent_path.to_path_buf(), + path: parent_path.to_owned(), })?; // Insert an edge from the parent directory to the child entry. @@ -332,23 +387,29 @@ mod test { lazy_static! { pub static ref EMPTY_DIGEST: B3Digest = blake3::hash(&[]).as_bytes().into(); - pub static ref DIR_A: IngestionEntry = IngestionEntry::Dir { path: "a".into() }; - pub static ref DIR_B: IngestionEntry = IngestionEntry::Dir { path: "b".into() }; - pub static ref DIR_A_B: IngestionEntry = IngestionEntry::Dir { path: "a/b".into() }; + pub static ref DIR_A: IngestionEntry = IngestionEntry::Dir { + path: "a".parse().unwrap() + }; + pub static ref DIR_B: IngestionEntry = IngestionEntry::Dir { + path: "b".parse().unwrap() + }; + pub static ref DIR_A_B: IngestionEntry = IngestionEntry::Dir { + path: "a/b".parse().unwrap() + }; pub static ref FILE_A: IngestionEntry = IngestionEntry::Regular { - path: "a".into(), + path: "a".parse().unwrap(), size: 0, executable: false, digest: EMPTY_DIGEST.clone(), }; pub static ref FILE_A_B: IngestionEntry = IngestionEntry::Regular { - path: "a/b".into(), + path: "a/b".parse().unwrap(), size: 0, executable: false, digest: EMPTY_DIGEST.clone(), }; pub static ref FILE_A_B_C: IngestionEntry = IngestionEntry::Regular { - path: "a/b/c".into(), + path: "a/b/c".parse().unwrap(), size: 0, executable: false, digest: EMPTY_DIGEST.clone(), diff --git a/tvix/castore/src/import/error.rs b/tvix/castore/src/import/error.rs index 15dd0664de..e3fba617e0 100644 --- a/tvix/castore/src/import/error.rs +++ b/tvix/castore/src/import/error.rs @@ -1,39 +1,20 @@ -use std::{fs::FileType, path::PathBuf}; +use super::PathBuf; use crate::Error as CastoreError; +/// Represents all error types that emitted by ingest_entries. +/// It can represent errors uploading individual Directories and finalizing +/// the upload. +/// It also contains a generic error kind that'll carry ingestion-method +/// specific errors. #[derive(Debug, thiserror::Error)] -pub enum Error { +pub enum IngestionError<E: std::fmt::Display> { + #[error("error from producer: {0}")] + Producer(#[from] E), + #[error("failed to upload directory at {0}: {1}")] UploadDirectoryError(PathBuf, CastoreError), - #[error("invalid encoding encountered for entry {0:?}")] - InvalidEncoding(PathBuf), - - #[error("unable to stat {0}: {1}")] - UnableToStat(PathBuf, std::io::Error), - - #[error("unable to open {0}: {1}")] - UnableToOpen(PathBuf, std::io::Error), - - #[error("unable to read {0}: {1}")] - UnableToRead(PathBuf, std::io::Error), - - #[error("unsupported file {0} type: {1:?}")] - UnsupportedFileType(PathBuf, FileType), -} - -impl From<CastoreError> for Error { - fn from(value: CastoreError) -> Self { - match value { - CastoreError::InvalidRequest(_) => panic!("tvix bug"), - CastoreError::StorageError(_) => panic!("error"), - } - } -} - -impl From<Error> for std::io::Error { - fn from(value: Error) -> Self { - std::io::Error::new(std::io::ErrorKind::Other, value) - } + #[error("failed to finalize directory upload: {0}")] + FinalizeDirectoryUpload(CastoreError), } diff --git a/tvix/castore/src/import/fs.rs b/tvix/castore/src/import/fs.rs index 6709d4a127..9d3ecfe6ab 100644 --- a/tvix/castore/src/import/fs.rs +++ b/tvix/castore/src/import/fs.rs @@ -1,8 +1,11 @@ +//! Import from a real filesystem. + use futures::stream::BoxStream; use futures::StreamExt; +use std::fs::FileType; +use std::os::unix::ffi::OsStringExt; use std::os::unix::fs::MetadataExt; use std::os::unix::fs::PermissionsExt; -use std::path::Path; use tracing::instrument; use walkdir::DirEntry; use walkdir::WalkDir; @@ -10,13 +13,11 @@ use walkdir::WalkDir; use crate::blobservice::BlobService; use crate::directoryservice::DirectoryService; use crate::proto::node::Node; +use crate::B3Digest; use super::ingest_entries; -use super::upload_blob_at_path; -use super::Error; use super::IngestionEntry; - -///! Imports that deal with a real filesystem. +use super::IngestionError; /// Ingests the contents at a given path into the tvix store, interacting with a [BlobService] and /// [DirectoryService]. It returns the root node or an error. @@ -30,11 +31,11 @@ pub async fn ingest_path<BS, DS, P>( blob_service: BS, directory_service: DS, path: P, -) -> Result<Node, Error> +) -> Result<Node, IngestionError<Error>> where - P: AsRef<Path> + std::fmt::Debug, + P: AsRef<std::path::Path> + std::fmt::Debug, BS: BlobService + Clone, - DS: AsRef<dyn DirectoryService>, + DS: DirectoryService, { let iter = WalkDir::new(path.as_ref()) .follow_links(false) @@ -55,13 +56,13 @@ where pub fn dir_entries_to_ingestion_stream<'a, BS, I>( blob_service: BS, iter: I, - root: &'a Path, + root: &'a std::path::Path, ) -> BoxStream<'a, Result<IngestionEntry, Error>> where BS: BlobService + Clone + 'a, I: Iterator<Item = Result<DirEntry, walkdir::Error>> + Send + 'a, { - let prefix = root.parent().unwrap_or_else(|| Path::new("")); + let prefix = root.parent().unwrap_or_else(|| std::path::Path::new("")); Box::pin( futures::stream::iter(iter) @@ -72,7 +73,7 @@ where Ok(dir_entry) => { dir_entry_to_ingestion_entry(blob_service, &dir_entry, prefix).await } - Err(e) => Err(Error::UnableToStat( + Err(e) => Err(Error::Stat( prefix.to_path_buf(), e.into_io_error().expect("walkdir err must be some"), )), @@ -91,32 +92,37 @@ where pub async fn dir_entry_to_ingestion_entry<BS>( blob_service: BS, entry: &DirEntry, - prefix: &Path, + prefix: &std::path::Path, ) -> Result<IngestionEntry, Error> where BS: BlobService, { let file_type = entry.file_type(); - let path = entry + let fs_path = entry .path() .strip_prefix(prefix) - .expect("Tvix bug: failed to strip root path prefix") - .to_path_buf(); + .expect("Tvix bug: failed to strip root path prefix"); + + // convert to castore PathBuf + let path = crate::path::PathBuf::from_host_path(fs_path, false) + .unwrap_or_else(|e| panic!("Tvix bug: walkdir direntry cannot be parsed: {}", e)); if file_type.is_dir() { Ok(IngestionEntry::Dir { path }) } else if file_type.is_symlink() { let target = std::fs::read_link(entry.path()) - .map_err(|e| Error::UnableToStat(entry.path().to_path_buf(), e))?; + .map_err(|e| Error::Stat(entry.path().to_path_buf(), e))? + .into_os_string() + .into_vec(); Ok(IngestionEntry::Symlink { path, target }) } else if file_type.is_file() { let metadata = entry .metadata() - .map_err(|e| Error::UnableToStat(entry.path().to_path_buf(), e.into()))?; + .map_err(|e| Error::Stat(entry.path().to_path_buf(), e.into()))?; - let digest = upload_blob_at_path(blob_service, entry.path().to_path_buf()).await?; + let digest = upload_blob(blob_service, entry.path().to_path_buf()).await?; Ok(IngestionEntry::Regular { path, @@ -127,6 +133,53 @@ where digest, }) } else { - Ok(IngestionEntry::Unknown { path, file_type }) + return Err(Error::FileType(fs_path.to_path_buf(), file_type)); } } + +/// Uploads the file at the provided [Path] the the [BlobService]. +#[instrument(skip(blob_service), fields(path), err)] +async fn upload_blob<BS>( + blob_service: BS, + path: impl AsRef<std::path::Path>, +) -> Result<B3Digest, Error> +where + BS: BlobService, +{ + let mut file = match tokio::fs::File::open(path.as_ref()).await { + Ok(file) => file, + Err(e) => return Err(Error::BlobRead(path.as_ref().to_path_buf(), e)), + }; + + let mut writer = blob_service.open_write().await; + + if let Err(e) = tokio::io::copy(&mut file, &mut writer).await { + return Err(Error::BlobRead(path.as_ref().to_path_buf(), e)); + }; + + let digest = writer + .close() + .await + .map_err(|e| Error::BlobFinalize(path.as_ref().to_path_buf(), e))?; + + Ok(digest) +} + +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("unsupported file type at {0}: {1:?}")] + FileType(std::path::PathBuf, FileType), + + #[error("unable to stat {0}: {1}")] + Stat(std::path::PathBuf, std::io::Error), + + #[error("unable to open {0}: {1}")] + Open(std::path::PathBuf, std::io::Error), + + #[error("unable to read {0}: {1}")] + BlobRead(std::path::PathBuf, std::io::Error), + + // TODO: proper error for blob finalize + #[error("unable to finalize blob {0}: {1}")] + BlobFinalize(std::path::PathBuf, std::io::Error), +} diff --git a/tvix/castore/src/import/mod.rs b/tvix/castore/src/import/mod.rs index e9fdc750f8..e8b27e469c 100644 --- a/tvix/castore/src/import/mod.rs +++ b/tvix/castore/src/import/mod.rs @@ -4,9 +4,9 @@ //! Specific implementations, such as ingesting from the filesystem, live in //! child modules. -use crate::blobservice::BlobService; use crate::directoryservice::DirectoryPutter; use crate::directoryservice::DirectoryService; +use crate::path::{Path, PathBuf}; use crate::proto::node::Node; use crate::proto::Directory; use crate::proto::DirectoryNode; @@ -14,21 +14,14 @@ use crate::proto::FileNode; use crate::proto::SymlinkNode; use crate::B3Digest; use futures::{Stream, StreamExt}; -use std::fs::FileType; use tracing::Level; -#[cfg(target_family = "unix")] -use std::os::unix::ffi::OsStrExt; - -use std::{ - collections::HashMap, - path::{Path, PathBuf}, -}; +use std::collections::HashMap; use tracing::instrument; mod error; -pub use error::Error; +pub use error::IngestionError; pub mod archive; pub mod fs; @@ -51,10 +44,14 @@ pub mod fs; /// /// On success, returns the root node. #[instrument(skip_all, ret(level = Level::TRACE), err)] -pub async fn ingest_entries<DS, S>(directory_service: DS, mut entries: S) -> Result<Node, Error> +pub async fn ingest_entries<DS, S, E>( + directory_service: DS, + mut entries: S, +) -> Result<Node, IngestionError<E>> where - DS: AsRef<dyn DirectoryService>, - S: Stream<Item = Result<IngestionEntry, Error>> + Send + std::marker::Unpin, + DS: DirectoryService, + S: Stream<Item = Result<IngestionEntry, E>> + Send + std::marker::Unpin, + E: std::error::Error, { // For a given path, this holds the [Directory] structs as they are populated. let mut directories: HashMap<PathBuf, Directory> = HashMap::default(); @@ -68,20 +65,11 @@ where // we break the loop manually. .expect("Tvix bug: unexpected end of stream")?; - debug_assert!( - entry - .path() - .components() - .all(|x| matches!(x, std::path::Component::Normal(_))), - "path may only contain normal components" - ); - let name = entry .path() .file_name() // If this is the root node, it will have an empty name. .unwrap_or_default() - .as_bytes() .to_owned() .into(); @@ -89,7 +77,8 @@ where IngestionEntry::Dir { .. } => { // If the entry is a directory, we traversed all its children (and // populated it in `directories`). - // If we don't have it in there, it's an empty directory. + // If we don't have it in directories, it's a directory without + // children. let directory = directories .remove(entry.path()) // In that case, it contained no children @@ -102,9 +91,12 @@ where // If we don't have one yet (as that's the first one to upload), // initialize the putter. maybe_directory_putter - .get_or_insert_with(|| directory_service.as_ref().put_multiple_start()) + .get_or_insert_with(|| directory_service.put_multiple_start()) .put(directory) - .await?; + .await + .map_err(|e| { + IngestionError::UploadDirectoryError(entry.path().to_owned(), e) + })?; Node::Directory(DirectoryNode { name, @@ -114,7 +106,7 @@ where } IngestionEntry::Symlink { ref target, .. } => Node::Symlink(SymlinkNode { name, - target: target.as_os_str().as_bytes().to_owned().into(), + target: target.to_owned().into(), }), IngestionEntry::Regular { size, @@ -127,23 +119,27 @@ where size: *size, executable: *executable, }), - IngestionEntry::Unknown { path, file_type } => { - return Err(Error::UnsupportedFileType(path.clone(), *file_type)); - } }; - if entry.path().components().count() == 1 { + let parent = entry + .path() + .parent() + .expect("Tvix bug: got entry with root node"); + + if parent == crate::Path::ROOT { break node; + } else { + // record node in parent directory, creating a new [Directory] if not there yet. + directories.entry(parent.to_owned()).or_default().add(node); } - - // record node in parent directory, creating a new [Directory] if not there yet. - directories - .entry(entry.path().parent().unwrap().to_path_buf()) - .or_default() - .add(node); }; assert!( + entries.count().await == 0, + "Tvix bug: left over elements in the stream" + ); + + assert!( directories.is_empty(), "Tvix bug: left over directories after processing ingestion stream" ); @@ -152,7 +148,10 @@ where // they're all persisted to the backend. if let Some(mut directory_putter) = maybe_directory_putter { #[cfg_attr(not(debug_assertions), allow(unused))] - let root_directory_digest = directory_putter.close().await?; + let root_directory_digest = directory_putter + .close() + .await + .map_err(|e| IngestionError::FinalizeDirectoryUpload(e))?; #[cfg(debug_assertions)] { @@ -174,31 +173,6 @@ where Ok(root_node) } -/// Uploads the file at the provided [Path] the the [BlobService]. -#[instrument(skip(blob_service), fields(path), err)] -async fn upload_blob_at_path<BS>(blob_service: BS, path: PathBuf) -> Result<B3Digest, Error> -where - BS: BlobService, -{ - let mut file = match tokio::fs::File::open(&path).await { - Ok(file) => file, - Err(e) => return Err(Error::UnableToRead(path, e)), - }; - - let mut writer = blob_service.open_write().await; - - if let Err(e) = tokio::io::copy(&mut file, &mut writer).await { - return Err(Error::UnableToRead(path, e)); - }; - - let digest = writer - .close() - .await - .map_err(|e| Error::UnableToRead(path, e))?; - - Ok(digest) -} - #[derive(Debug, Clone, Eq, PartialEq)] pub enum IngestionEntry { Regular { @@ -209,15 +183,11 @@ pub enum IngestionEntry { }, Symlink { path: PathBuf, - target: PathBuf, + target: Vec<u8>, }, Dir { path: PathBuf, }, - Unknown { - path: PathBuf, - file_type: FileType, - }, } impl IngestionEntry { @@ -226,7 +196,6 @@ impl IngestionEntry { IngestionEntry::Regular { path, .. } => path, IngestionEntry::Symlink { path, .. } => path, IngestionEntry::Dir { path } => path, - IngestionEntry::Unknown { path, .. } => path, } } @@ -234,3 +203,138 @@ impl IngestionEntry { matches!(self, IngestionEntry::Dir { .. }) } } + +#[cfg(test)] +mod test { + use rstest::rstest; + + use crate::fixtures::{DIRECTORY_COMPLICATED, DIRECTORY_WITH_KEEP, EMPTY_BLOB_DIGEST}; + use crate::proto::node::Node; + use crate::proto::{Directory, DirectoryNode, FileNode, SymlinkNode}; + use crate::{directoryservice::MemoryDirectoryService, fixtures::DUMMY_DIGEST}; + + use super::ingest_entries; + use super::IngestionEntry; + + #[rstest] + #[case::single_file(vec![IngestionEntry::Regular { + path: "foo".parse().unwrap(), + size: 42, + executable: true, + digest: DUMMY_DIGEST.clone(), + }], + Node::File(FileNode { name: "foo".into(), digest: DUMMY_DIGEST.clone().into(), size: 42, executable: true } + ))] + #[case::single_symlink(vec![IngestionEntry::Symlink { + path: "foo".parse().unwrap(), + target: b"blub".into(), + }], + Node::Symlink(SymlinkNode { name: "foo".into(), target: "blub".into()}) + )] + #[case::single_dir(vec![IngestionEntry::Dir { + path: "foo".parse().unwrap(), + }], + Node::Directory(DirectoryNode { name: "foo".into(), digest: Directory::default().digest().into(), size: Directory::default().size()}) + )] + #[case::dir_with_keep(vec![ + IngestionEntry::Regular { + path: "foo/.keep".parse().unwrap(), + size: 0, + executable: false, + digest: EMPTY_BLOB_DIGEST.clone(), + }, + IngestionEntry::Dir { + path: "foo".parse().unwrap(), + }, + ], + Node::Directory(DirectoryNode { name: "foo".into(), digest: DIRECTORY_WITH_KEEP.digest().into(), size: DIRECTORY_WITH_KEEP.size() }) + )] + /// This is intentionally a bit unsorted, though it still satisfies all + /// requirements we have on the order of elements in the stream. + #[case::directory_complicated(vec![ + IngestionEntry::Regular { + path: "blub/.keep".parse().unwrap(), + size: 0, + executable: false, + digest: EMPTY_BLOB_DIGEST.clone(), + }, + IngestionEntry::Regular { + path: "blub/keep/.keep".parse().unwrap(), + size: 0, + executable: false, + digest: EMPTY_BLOB_DIGEST.clone(), + }, + IngestionEntry::Dir { + path: "blub/keep".parse().unwrap(), + }, + IngestionEntry::Symlink { + path: "blub/aa".parse().unwrap(), + target: b"/nix/store/somewhereelse".into(), + }, + IngestionEntry::Dir { + path: "blub".parse().unwrap(), + }, + ], + Node::Directory(DirectoryNode { name: "blub".into(), digest: DIRECTORY_COMPLICATED.digest().into(), size:DIRECTORY_COMPLICATED.size() }) + )] + #[tokio::test] + async fn test_ingestion(#[case] entries: Vec<IngestionEntry>, #[case] exp_root_node: Node) { + let directory_service = MemoryDirectoryService::default(); + + let root_node = ingest_entries( + directory_service.clone(), + futures::stream::iter(entries.into_iter().map(Ok::<_, std::io::Error>)), + ) + .await + .expect("must succeed"); + + assert_eq!(exp_root_node, root_node, "root node should match"); + } + + #[rstest] + #[should_panic] + #[case::empty_entries(vec![])] + #[should_panic] + #[case::missing_intermediate_dir(vec![ + IngestionEntry::Regular { + path: "blub/.keep".parse().unwrap(), + size: 0, + executable: false, + digest: EMPTY_BLOB_DIGEST.clone(), + }, + ])] + #[should_panic] + #[case::leaf_after_parent(vec![ + IngestionEntry::Dir { + path: "blub".parse().unwrap(), + }, + IngestionEntry::Regular { + path: "blub/.keep".parse().unwrap(), + size: 0, + executable: false, + digest: EMPTY_BLOB_DIGEST.clone(), + }, + ])] + #[should_panic] + #[case::root_in_entry(vec![ + IngestionEntry::Regular { + path: ".keep".parse().unwrap(), + size: 0, + executable: false, + digest: EMPTY_BLOB_DIGEST.clone(), + }, + IngestionEntry::Dir { + path: "".parse().unwrap(), + }, + ])] + #[tokio::test] + async fn test_ingestion_fail(#[case] entries: Vec<IngestionEntry>) { + let directory_service = MemoryDirectoryService::default(); + + let _ = ingest_entries( + directory_service.clone(), + futures::stream::iter(entries.into_iter().map(Ok::<_, std::io::Error>)), + ) + .await; + } +} diff --git a/tvix/castore/src/lib.rs b/tvix/castore/src/lib.rs index 1a7ac6b4b4..bdc533a8c5 100644 --- a/tvix/castore/src/lib.rs +++ b/tvix/castore/src/lib.rs @@ -9,6 +9,9 @@ pub mod fixtures; #[cfg(feature = "fs")] pub mod fs; +mod path; +pub use path::{Path, PathBuf}; + pub mod import; pub mod proto; pub mod tonic; diff --git a/tvix/castore/src/path.rs b/tvix/castore/src/path.rs new file mode 100644 index 0000000000..fcc2bd01fb --- /dev/null +++ b/tvix/castore/src/path.rs @@ -0,0 +1,446 @@ +//! Contains data structures to deal with Paths in the tvix-castore model. + +use std::{ + borrow::Borrow, + fmt::{self, Debug, Display}, + mem, + ops::Deref, + str::FromStr, +}; + +use bstr::ByteSlice; + +use crate::proto::validate_node_name; + +/// Represents a Path in the castore model. +/// These are always relative, and platform-independent, which distinguishes +/// them from the ones provided in the standard library. +#[derive(Eq, Hash, PartialEq)] +#[repr(transparent)] // SAFETY: Representation has to match [u8] +pub struct Path { + // As node names in the castore model cannot contain slashes, + // we use them as component separators here. + inner: [u8], +} + +#[allow(dead_code)] +impl Path { + // SAFETY: The empty path is valid. + pub const ROOT: &'static Path = unsafe { Path::from_bytes_unchecked(&[]) }; + + /// Convert a byte slice to a path, without checking validity. + const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Path { + // SAFETY: &[u8] and &Path have the same representation. + unsafe { mem::transmute(bytes) } + } + + fn from_bytes(bytes: &[u8]) -> Option<&Path> { + if !bytes.is_empty() { + // Ensure all components are valid castore node names. + for component in bytes.split_str(b"/") { + validate_node_name(component).ok()?; + } + } + + // SAFETY: We have verified that the path contains no empty components. + Some(unsafe { Path::from_bytes_unchecked(bytes) }) + } + + pub fn into_boxed_bytes(self: Box<Path>) -> Box<[u8]> { + // SAFETY: Box<Path> and Box<[u8]> have the same representation. + unsafe { mem::transmute(self) } + } + + /// Returns the path without its final component, if there is one. + /// + /// Note that the parent of a bare file name is [Path::ROOT]. + /// [Path::ROOT] is the only path without a parent. + pub fn parent(&self) -> Option<&Path> { + // The root does not have a parent. + if self.inner.is_empty() { + return None; + } + + Some( + if let Some((parent, _file_name)) = self.inner.rsplit_once_str(b"/") { + // SAFETY: The parent of a valid Path is a valid Path. + unsafe { Path::from_bytes_unchecked(parent) } + } else { + // The parent of a bare file name is the root. + Path::ROOT + }, + ) + } + + /// Creates a PathBuf with `name` adjoined to self. + pub fn try_join(&self, name: &[u8]) -> Result<PathBuf, std::io::Error> { + let mut v = PathBuf::with_capacity(self.inner.len() + name.len() + 1); + v.inner.extend_from_slice(&self.inner); + v.try_push(name)?; + + Ok(v) + } + + /// Produces an iterator over the components of the path, which are + /// individual byte slices. + /// In case the path is empty, an empty iterator is returned. + pub fn components(&self) -> impl Iterator<Item = &[u8]> { + let mut iter = self.inner.split_str(&b"/"); + + // We don't want to return an empty element, consume it if it's the only one. + if self.inner.is_empty() { + let _ = iter.next(); + } + + iter + } + + /// Returns the final component of the Path, if there is one. + pub fn file_name(&self) -> Option<&[u8]> { + self.components().last() + } + + pub fn as_bytes(&self) -> &[u8] { + &self.inner + } +} + +impl Debug for Path { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + Debug::fmt(self.inner.as_bstr(), f) + } +} + +impl Display for Path { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + Display::fmt(self.inner.as_bstr(), f) + } +} + +impl AsRef<Path> for Path { + fn as_ref(&self) -> &Path { + self + } +} + +/// Represents a owned PathBuf in the castore model. +/// These are always relative, and platform-independent, which distinguishes +/// them from the ones provided in the standard library. +#[derive(Clone, Default, Eq, Hash, PartialEq)] +pub struct PathBuf { + inner: Vec<u8>, +} + +impl Deref for PathBuf { + type Target = Path; + + fn deref(&self) -> &Self::Target { + // SAFETY: PathBuf always contains a valid Path. + unsafe { Path::from_bytes_unchecked(&self.inner) } + } +} + +impl AsRef<Path> for PathBuf { + fn as_ref(&self) -> &Path { + self + } +} + +impl ToOwned for Path { + type Owned = PathBuf; + + fn to_owned(&self) -> Self::Owned { + PathBuf { + inner: self.inner.to_owned(), + } + } +} + +impl Borrow<Path> for PathBuf { + fn borrow(&self) -> &Path { + self + } +} + +impl From<Box<Path>> for PathBuf { + fn from(value: Box<Path>) -> Self { + // SAFETY: Box<Path> is always a valid path. + unsafe { PathBuf::from_bytes_unchecked(value.into_boxed_bytes().into_vec()) } + } +} + +impl From<&Path> for PathBuf { + fn from(value: &Path) -> Self { + value.to_owned() + } +} + +impl FromStr for PathBuf { + type Err = std::io::Error; + + fn from_str(s: &str) -> Result<PathBuf, Self::Err> { + Ok(Path::from_bytes(s.as_bytes()) + .ok_or(std::io::ErrorKind::InvalidData)? + .to_owned()) + } +} + +impl Debug for PathBuf { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + Debug::fmt(&**self, f) + } +} + +impl Display for PathBuf { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + Display::fmt(&**self, f) + } +} + +impl PathBuf { + pub fn new() -> PathBuf { + Self::default() + } + + pub fn with_capacity(capacity: usize) -> PathBuf { + // SAFETY: The empty path is a valid path. + Self { + inner: Vec::with_capacity(capacity), + } + } + + /// Adjoins `name` to self. + pub fn try_push(&mut self, name: &[u8]) -> Result<(), std::io::Error> { + validate_node_name(name).map_err(|_| std::io::ErrorKind::InvalidData)?; + + if !self.inner.is_empty() { + self.inner.push(b'/'); + } + + self.inner.extend_from_slice(name); + + Ok(()) + } + + /// Convert a byte vector to a PathBuf, without checking validity. + unsafe fn from_bytes_unchecked(bytes: Vec<u8>) -> PathBuf { + PathBuf { inner: bytes } + } + + /// Convert from a [&std::path::Path] to [Self]. + /// + /// - Self uses `/` as path separator. + /// - Absolute paths are always rejected, are are these with custom prefixes. + /// - Repeated separators are deduplicated. + /// - Occurrences of `.` are normalized away. + /// - A trailing slash is normalized away. + /// + /// A `canonicalize_dotdot` boolean controls whether `..` will get + /// canonicalized if possible, or should return an error. + /// + /// For more exotic paths, this conversion might produce different results + /// on different platforms, due to different underlying byte + /// representations, which is why it's restricted to unix for now. + #[cfg(unix)] + pub fn from_host_path( + host_path: &std::path::Path, + canonicalize_dotdot: bool, + ) -> Result<Self, std::io::Error> { + let mut p = PathBuf::with_capacity(host_path.as_os_str().len()); + + for component in host_path.components() { + match component { + std::path::Component::Prefix(_) | std::path::Component::RootDir => { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "found disallowed prefix or rootdir", + )) + } + std::path::Component::CurDir => continue, // ignore + std::path::Component::ParentDir => { + if canonicalize_dotdot { + // Try popping the last element from the path being constructed. + // FUTUREWORK: pop method? + p = p + .parent() + .ok_or_else(|| { + std::io::Error::new( + std::io::ErrorKind::InvalidData, + "found .. going too far up", + ) + })? + .to_owned(); + } else { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "found disallowed ..", + )); + } + } + std::path::Component::Normal(s) => { + // append the new component to the path being constructed. + p.try_push(s.as_encoded_bytes()).map_err(|_| { + std::io::Error::new( + std::io::ErrorKind::InvalidData, + "encountered invalid node in sub_path component", + ) + })? + } + } + } + + Ok(p) + } + + pub fn into_boxed_path(self) -> Box<Path> { + // SAFETY: Box<[u8]> and Box<Path> have the same representation, + // and PathBuf always contains a valid Path. + unsafe { mem::transmute(self.inner.into_boxed_slice()) } + } + + pub fn into_bytes(self) -> Vec<u8> { + self.inner + } +} + +#[cfg(test)] +mod test { + use super::{Path, PathBuf}; + use bstr::ByteSlice; + use rstest::rstest; + + // TODO: add some manual tests including invalid UTF-8 (hard to express + // with rstest) + + #[rstest] + #[case::empty("", 0)] + #[case("a", 1)] + #[case("a/b", 2)] + #[case("a/b/c", 3)] + // add two slightly more cursed variants. + // Technically nothing prevents us from representing this with castore, + // but maybe we want to disallow constructing paths like this as it's a + // bad idea. + #[case::cursed("C:\\a/b", 2)] + #[case::cursed("\\\\tvix-store", 1)] + pub fn from_str(#[case] s: &str, #[case] num_components: usize) { + let p: PathBuf = s.parse().expect("must parse"); + + assert_eq!(s.as_bytes(), p.as_bytes(), "inner bytes mismatch"); + assert_eq!( + num_components, + p.components().count(), + "number of components mismatch" + ); + } + + #[rstest] + #[case::absolute("/a/b")] + #[case::two_forward_slashes_start("//a/b")] + #[case::two_forward_slashes_middle("a/b//c/d")] + #[case::trailing_slash("a/b/")] + #[case::dot(".")] + #[case::dotdot("..")] + #[case::dot_start("./a")] + #[case::dotdot_start("../a")] + #[case::dot_middle("a/./b")] + #[case::dotdot_middle("a/../b")] + #[case::dot_end("a/b/.")] + #[case::dotdot_end("a/b/..")] + #[case::null("fo\0o")] + pub fn from_str_fail(#[case] s: &str) { + s.parse::<PathBuf>().expect_err("must fail"); + } + + #[rstest] + #[case("foo", "")] + #[case("foo/bar", "foo")] + #[case("foo2/bar2", "foo2")] + #[case("foo/bar/baz", "foo/bar")] + pub fn parent(#[case] p: PathBuf, #[case] exp_parent: PathBuf) { + assert_eq!(Some(&*exp_parent), p.parent()); + } + + #[rstest] + pub fn no_parent() { + assert!(Path::ROOT.parent().is_none()); + } + + #[rstest] + #[case("a", "b", "a/b")] + #[case("a", "b", "a/b")] + pub fn join_push(#[case] mut p: PathBuf, #[case] name: &str, #[case] exp_p: PathBuf) { + assert_eq!(exp_p, p.try_join(name.as_bytes()).expect("join failed")); + p.try_push(name.as_bytes()).expect("push failed"); + assert_eq!(exp_p, p); + } + + #[rstest] + #[case("a", "/")] + #[case("a", "")] + #[case("a", "b/c")] + #[case("", "/")] + #[case("", "")] + #[case("", "b/c")] + #[case("", ".")] + #[case("", "..")] + pub fn join_push_fail(#[case] mut p: PathBuf, #[case] name: &str) { + p.try_join(name.as_bytes()) + .expect_err("join succeeded unexpectedly"); + p.try_push(name.as_bytes()) + .expect_err("push succeeded unexpectedly"); + } + + #[rstest] + #[case::empty("", vec![])] + #[case("a", vec!["a"])] + #[case("a/b", vec!["a", "b"])] + #[case("a/b/c", vec!["a","b", "c"])] + pub fn components(#[case] p: PathBuf, #[case] exp_components: Vec<&str>) { + assert_eq!( + exp_components, + p.components() + .map(|x| x.to_str().unwrap()) + .collect::<Vec<_>>() + ); + } + + #[rstest] + #[case::empty("", "", false)] + #[case::path("a", "a", false)] + #[case::path2("a/b", "a/b", false)] + #[case::double_slash_middle("a//b", "a/b", false)] + #[case::dot(".", "", false)] + #[case::dot_start("./a/b", "a/b", false)] + #[case::dot_middle("a/./b", "a/b", false)] + #[case::dot_end("a/b/.", "a/b", false)] + #[case::trailing_slash("a/b/", "a/b", false)] + #[case::dotdot_canonicalize("a/..", "", true)] + #[case::dotdot_canonicalize2("a/../b", "b", true)] + #[cfg_attr(unix, case::faux_prefix("\\\\nix-store", "\\\\nix-store", false))] + #[cfg_attr(unix, case::faux_letter("C:\\foo.txt", "C:\\foo.txt", false))] + pub fn from_host_path( + #[case] host_path: std::path::PathBuf, + #[case] exp_path: PathBuf, + #[case] canonicalize_dotdot: bool, + ) { + let p = PathBuf::from_host_path(&host_path, canonicalize_dotdot).expect("must succeed"); + + assert_eq!(exp_path, p); + } + + #[rstest] + #[case::absolute("/", false)] + #[case::dotdot_root("..", false)] + #[case::dotdot_root_canonicalize("..", true)] + #[case::dotdot_root_no_canonicalize("a/..", false)] + #[case::invalid_name("foo/bar\0", false)] + // #[cfg_attr(windows, case::prefix("\\\\nix-store", false))] + // #[cfg_attr(windows, case::letter("C:\\foo.txt", false))] + pub fn from_host_path_fail( + #[case] host_path: std::path::PathBuf, + #[case] canonicalize_dotdot: bool, + ) { + PathBuf::from_host_path(&host_path, canonicalize_dotdot).expect_err("must fail"); + } +} diff --git a/tvix/castore/src/proto/grpc_directoryservice_wrapper.rs b/tvix/castore/src/proto/grpc_directoryservice_wrapper.rs index 7d741a3f07..5c1428690c 100644 --- a/tvix/castore/src/proto/grpc_directoryservice_wrapper.rs +++ b/tvix/castore/src/proto/grpc_directoryservice_wrapper.rs @@ -1,12 +1,12 @@ use crate::directoryservice::ClosureValidator; use crate::proto; use crate::{directoryservice::DirectoryService, B3Digest}; -use futures::StreamExt; +use futures::stream::BoxStream; +use futures::TryStreamExt; use std::ops::Deref; -use tokio::sync::mpsc::channel; -use tokio_stream::wrappers::ReceiverStream; +use tokio_stream::once; use tonic::{async_trait, Request, Response, Status, Streaming}; -use tracing::{debug, instrument, warn}; +use tracing::{instrument, warn}; pub struct GRPCDirectoryServiceWrapper<T> { directory_service: T, @@ -23,63 +23,52 @@ impl<T> proto::directory_service_server::DirectoryService for GRPCDirectoryServi where T: Deref<Target = dyn DirectoryService> + Send + Sync + 'static, { - type GetStream = ReceiverStream<tonic::Result<proto::Directory, Status>>; + type GetStream = BoxStream<'static, tonic::Result<proto::Directory, Status>>; #[instrument(skip_all)] - async fn get( - &self, + async fn get<'a>( + &'a self, request: Request<proto::GetDirectoryRequest>, ) -> Result<Response<Self::GetStream>, Status> { - let (tx, rx) = channel(5); - let req_inner = request.into_inner(); - // look at the digest in the request and put it in the top of the queue. - match &req_inner.by_what { - None => return Err(Status::invalid_argument("by_what needs to be specified")), - Some(proto::get_directory_request::ByWhat::Digest(ref digest)) => { + let by_what = &req_inner + .by_what + .ok_or_else(|| Status::invalid_argument("invalid by_what"))?; + + match by_what { + proto::get_directory_request::ByWhat::Digest(ref digest) => { let digest: B3Digest = digest .clone() .try_into() .map_err(|_e| Status::invalid_argument("invalid digest length"))?; - if !req_inner.recursive { - let e: Result<proto::Directory, Status> = match self - .directory_service - .get(&digest) - .await - { - Ok(Some(directory)) => Ok(directory), - Ok(None) => { - Err(Status::not_found(format!("directory {} not found", digest))) - } - Err(e) => { - warn!(err = %e, directory.digest=%digest, "failed to get directory"); - Err(e.into()) - } - }; - - if tx.send(e).await.is_err() { - debug!("receiver dropped"); + Ok(tonic::Response::new({ + if !req_inner.recursive { + let directory = self + .directory_service + .get(&digest) + .await + .map_err(|e| { + warn!(err = %e, directory.digest=%digest, "failed to get directory"); + tonic::Status::new(tonic::Code::Internal, e.to_string()) + })? + .ok_or_else(|| { + Status::not_found(format!("directory {} not found", digest)) + })?; + + Box::pin(once(Ok(directory))) + } else { + // If recursive was requested, traverse via get_recursive. + Box::pin( + self.directory_service.get_recursive(&digest).map_err(|e| { + tonic::Status::new(tonic::Code::Internal, e.to_string()) + }), + ) } - } else { - // If recursive was requested, traverse via get_recursive. - let mut directories_it = self.directory_service.get_recursive(&digest); - - while let Some(e) = directories_it.next().await { - // map err in res from Error to Status - let res = e.map_err(|e| Status::internal(e.to_string())); - if tx.send(res).await.is_err() { - debug!("receiver dropped"); - break; - } - } - } + })) } } - - let receiver_stream = ReceiverStream::new(rx); - Ok(Response::new(receiver_stream)) } #[instrument(skip_all)] diff --git a/tvix/castore/src/proto/mod.rs b/tvix/castore/src/proto/mod.rs index 39c1bcc6fa..5374e3ae5a 100644 --- a/tvix/castore/src/proto/mod.rs +++ b/tvix/castore/src/proto/mod.rs @@ -66,7 +66,7 @@ pub enum ValidateStatBlobResponseError { /// Checks a Node name for validity as an intermediate node. /// We disallow slashes, null bytes, '.', '..' and the empty string. -fn validate_node_name(name: &[u8]) -> Result<(), ValidateNodeError> { +pub(crate) fn validate_node_name(name: &[u8]) -> Result<(), ValidateNodeError> { if name.is_empty() || name == b".." || name == b"." diff --git a/tvix/cli/src/main.rs b/tvix/cli/src/main.rs index 5635f446b9..d66d2ce4cb 100644 --- a/tvix/cli/src/main.rs +++ b/tvix/cli/src/main.rs @@ -80,27 +80,23 @@ struct Args { build_service_addr: String, } -/// Interprets the given code snippet, printing out warnings, errors -/// and the result itself. The return value indicates whether -/// evaluation succeeded. -fn interpret(code: &str, path: Option<PathBuf>, args: &Args, explain: bool) -> bool { - let tokio_runtime = tokio::runtime::Runtime::new().expect("failed to setup tokio runtime"); - - let (blob_service, directory_service, path_info_service) = tokio_runtime - .block_on({ - let blob_service_addr = args.blob_service_addr.clone(); - let directory_service_addr = args.directory_service_addr.clone(); - let path_info_service_addr = args.path_info_service_addr.clone(); - async move { - tvix_store::utils::construct_services( - blob_service_addr, - directory_service_addr, - path_info_service_addr, - ) - .await - } - }) - .expect("unable to setup {blob|directory|pathinfo}service before interpreter setup"); +fn init_io_handle(tokio_runtime: &tokio::runtime::Runtime, args: &Args) -> Rc<TvixStoreIO> { + let (blob_service, directory_service, path_info_service, nar_calculation_service) = + tokio_runtime + .block_on({ + let blob_service_addr = args.blob_service_addr.clone(); + let directory_service_addr = args.directory_service_addr.clone(); + let path_info_service_addr = args.path_info_service_addr.clone(); + async move { + tvix_store::utils::construct_services( + blob_service_addr, + directory_service_addr, + path_info_service_addr, + ) + .await + } + }) + .expect("unable to setup {blob|directory|pathinfo}service before interpreter setup"); let build_service = tokio_runtime .block_on({ @@ -117,14 +113,26 @@ fn interpret(code: &str, path: Option<PathBuf>, args: &Args, explain: bool) -> b }) .expect("unable to setup buildservice before interpreter setup"); - let tvix_store_io = Rc::new(TvixStoreIO::new( + Rc::new(TvixStoreIO::new( blob_service.clone(), directory_service.clone(), path_info_service.into(), + nar_calculation_service.into(), build_service.into(), tokio_runtime.handle().clone(), - )); + )) +} +/// Interprets the given code snippet, printing out warnings, errors +/// and the result itself. The return value indicates whether +/// evaluation succeeded. +fn interpret( + tvix_store_io: Rc<TvixStoreIO>, + code: &str, + path: Option<PathBuf>, + args: &Args, + explain: bool, +) -> bool { let mut eval = tvix_eval::Evaluation::new( Box::new(TvixIO::new(tvix_store_io.clone() as Rc<dyn EvalIO>)) as Box<dyn EvalIO>, true, @@ -242,18 +250,22 @@ fn main() { .try_init() .expect("unable to set up tracing subscriber"); + let tokio_runtime = tokio::runtime::Runtime::new().expect("failed to setup tokio runtime"); + + let io_handle = init_io_handle(&tokio_runtime, &args); + if let Some(file) = &args.script { - run_file(file.clone(), &args) + run_file(io_handle, file.clone(), &args) } else if let Some(expr) = &args.expr { - if !interpret(expr, None, &args, false) { + if !interpret(io_handle, expr, None, &args, false) { std::process::exit(1); } } else { - run_prompt(&args) + run_prompt(io_handle, &args) } } -fn run_file(mut path: PathBuf, args: &Args) { +fn run_file(io_handle: Rc<TvixStoreIO>, mut path: PathBuf, args: &Args) { if path.is_dir() { path.push("default.nix"); } @@ -262,7 +274,7 @@ fn run_file(mut path: PathBuf, args: &Args) { let success = if args.compile_only { lint(&contents, Some(path), args) } else { - interpret(&contents, Some(path), args, false) + interpret(io_handle, &contents, Some(path), args, false) }; if !success { @@ -286,7 +298,7 @@ fn state_dir() -> Option<PathBuf> { path } -fn run_prompt(args: &Args) { +fn run_prompt(io_handle: Rc<TvixStoreIO>, args: &Args) { let mut rl = Editor::<()>::new().expect("should be able to launch rustyline"); if args.compile_only { @@ -317,9 +329,9 @@ fn run_prompt(args: &Args) { rl.add_history_entry(&line); if let Some(without_prefix) = line.strip_prefix(":d ") { - interpret(without_prefix, None, args, true); + interpret(Rc::clone(&io_handle), without_prefix, None, args, true); } else { - interpret(&line, None, args, false); + interpret(Rc::clone(&io_handle), &line, None, args, false); } } Err(ReadlineError::Interrupted) | Err(ReadlineError::Eof) => break, diff --git a/tvix/crate-hashes.json b/tvix/crate-hashes.json index ca45e43176..2c1e740cb9 100644 --- a/tvix/crate-hashes.json +++ b/tvix/crate-hashes.json @@ -1,4 +1,4 @@ { - "bigtable_rs 0.2.9 (git+https://github.com/flokli/bigtable_rs?rev=0af404741dfc40eb9fa99cf4d4140a09c5c20df7#0af404741dfc40eb9fa99cf4d4140a09c5c20df7)": "1njjam1lx2xlnm7a41lga8601vmjgqz0fvc77x24gd04pc7avxll", - "wu-manber 0.1.0 (git+https://github.com/tvlfyi/wu-manber.git#0d5b22bea136659f7de60b102a7030e0daaa503d)": "1zhk83lbq99xzyjwphv2qrb8f8qgfqwa5bbbvyzm0z0bljsjv0pd" + "git+https://github.com/flokli/bigtable_rs?rev=0af404741dfc40eb9fa99cf4d4140a09c5c20df7#0.2.9": "1njjam1lx2xlnm7a41lga8601vmjgqz0fvc77x24gd04pc7avxll", + "git+https://github.com/tvlfyi/wu-manber.git#wu-manber@0.1.0": "1zhk83lbq99xzyjwphv2qrb8f8qgfqwa5bbbvyzm0z0bljsjv0pd" } \ No newline at end of file diff --git a/tvix/default.nix b/tvix/default.nix index f562cf37de..a3a4d35df6 100644 --- a/tvix/default.nix +++ b/tvix/default.nix @@ -224,9 +224,7 @@ in rustPlatform.cargoSetupHook ]; - # Allow blocks_in_conditions due to false positives with #[tracing::instrument(…)]: - # https://github.com/rust-lang/rust-clippy/issues/12281 - buildPhase = "cargo clippy --tests --all-features --benches --examples -- -Dwarnings -A clippy::blocks_in_conditions | tee $out"; + buildPhase = "cargo clippy --tests --all-features --benches --examples -- -Dwarnings | tee $out"; }; meta.ci.targets = [ diff --git a/tvix/docs/src/TODO.md b/tvix/docs/src/TODO.md index 6644bb6bac..8fb22ea822 100644 --- a/tvix/docs/src/TODO.md +++ b/tvix/docs/src/TODO.md @@ -10,11 +10,27 @@ Feel free to add new ideas. Before picking something, ask in `#tvix-dev` to make sure noone is working on this, or has some specific design in mind already. ## Cleanups +### Nix language test suite + - Think about how to merge, but "categorize" `tvix_tests` in `glue` and `eval`. + We currently only have this split as they need a different feature set / + builtins. - move some of the rstest cases in `tvix-glue` to the `.nix`/`.exp` mechanism. - - Parts requiring test fixtures need some special convention. - Some of these also cannot be checked into the repo, like the import tests - adding special files to test filtering. - - add `nix_oracle` mechanism from `tvix-eval` to `tvix-glue`. + Some of them need test fixtures, which cannot be represented in git (special + file types in the import tests for example). Needs some support from the test + suite to create these fixtures on demand. + - extend `verify-lang-tests/default.nix` mechanism to validate `tvix-eval` and + `tvix-glue` test cases (or the common structure above). + - absorb `eval/tests/nix_oracle.rs` into `tvix_tests`, or figure out why it's + not possible (and document) it. It looks like it's only as nix is invoked + with a different level of `--strict`, but the toplevel doc-comment suggests + its generic? + +### Error cleanup + - Currently, all services use tvix_castore::Error, which only has two kinds + (invalid request, storage error), containing an (owned) string. + This is quite primitive. We should have individual error types for BS, DS, PS. + Maybe these should have some generics to still be able to carry errors from + the underlying backend, similar to `IngestionError`. ## Fixes towards correctness - `builtins.toXML` is missing string context. See b/398. @@ -114,11 +130,8 @@ logs etc, but this is something requiring a lot of designing. - [redb](https://www.redb.org/) backend - sqlite backend (different schema than the Nix one, we need the root nodes data!) -### Nix-compat -- Async NAR reader (@edef?) - ### Nix Daemon protocol -- Some work ongoing on the worker operation parsing. Partially blocked on the async NAR reader. +- Some work ongoing on the worker operation parsing (griff, picnoir) ### O11Y - gRPC trace propagation (cl/10532) diff --git a/tvix/eval/docs/bindings.md b/tvix/eval/docs/bindings.md new file mode 100644 index 0000000000..2b062cb13d --- /dev/null +++ b/tvix/eval/docs/bindings.md @@ -0,0 +1,133 @@ +Compilation of bindings +======================= + +Compilation of Nix bindings is one of the most mind-bending parts of Nix +evaluation. The implementation of just the compilation is currently almost 1000 +lines of code, excluding the various insane test cases we dreamt up for it. + +## What is a binding? + +In short, any attribute set or `let`-expression. Tvix currently does not treat +formals in function parameters (e.g. `{ name ? "fred" }: ...`) the same as these +bindings. + +They have two very difficult features: + +1. Keys can mutually refer to each other in `rec` sets or `let`-bindings, + including out of definition order. +2. Attribute sets can be nested, and parts of one attribute set can be defined + in multiple separate bindings. + +Tvix resolves as much of this logic statically (i.e. at compile-time) as +possible, but the procedure is quite complicated. + +## High-level concept + +The idea behind the way we compile bindings is to fully resolve nesting +statically, and use the usual mechanisms (i.e. recursion/thunking/value +capturing) for resolving dynamic values. + +This is done by compiling bindings in several phases: + +1. An initial compilation phase *only* for plain inherit statements (i.e. + `inherit name;`), *not* for namespaced inherits (i.e. `inherit (from) + name;`). + +2. A declaration-only phase, in which we use the compiler's scope tracking logic + to calculate the physical runtime stack indices (further referred to as + "stack slots" or just "slots") that all values will end up in. + + In this phase, whenever we encounter a nested attribute set, it is merged + into a custom data structure that acts like a synthetic AST node. + + This can be imagined similar to a rewrite like this: + + ```nix + # initial code: + { + a.b = 1; + a.c = 2; + } + + # rewritten form: + { + a = { + b = 1; + c = 2; + }; + } + ``` + + The rewrite applies to attribute sets and `let`-bindings alike. + + At the end of this phase, we know the stack slots of all namespaces for + inheriting from, all values inherited from them, and all values (and + optionall keys) of bindings at the current level. + + Only statically known keys are actually merged, so any dynamic keys that + conflict will lead to a "key already defined" error at runtime. + +3. A compilation phase, in which all values (and, when necessary, keys) are + actually compiled. In this phase the custom data structure used for merging + is encountered when compiling values. + + As this data structure acts like an AST node, the process begins recursively + for each nested attribute set. + +At the end of this process we have bytecode that leaves the required values (and +optionally keys) on the stack. In the case of attribute sets, a final operation +is emitted that constructs the actual attribute set structure at runtime. For +`let`-bindings a final operation is emitted that removes these locals from the +stack when the scope ends. + +## Moving parts + +WARNING: This documents the *current* implementation. If you only care about the +conceptual aspects, see above. + +There's a few types involved: + +* `PeekableAttrs`: peekable iterator over an attribute path (e.g. `a.b.c`) +* `BindingsKind`: enum defining the kind of bindings (attrs/recattrs/let) +* `AttributeSet`: struct holding the bindings kind, the AST nodes with inherits + (both namespaced and not), and an internal representation of bindings + (essentially a vector of tuples of the peekable attrs and the expression to + compile for the value). +* `Binding`: enum describing the kind of binding (namespaced inherit, attribute + set, plain binding of *any other value type*) +* `KeySlot`: enum describing the location in which a key slot is placed at + runtime (nowhere, statically known value in a slot, dynamic value in a slot) +* `TrackedBinding`: struct representing statically known information about a + single binding (its key slot, value slot and `Binding`) +* `TrackedBindings`: vector of tracked bindings, which implements logic for + merging attribute sets together + +And quite a few methods on `Compiler`: + +* `compile_bindings`: entry point for compiling anything that looks like a + binding, this calls out to the functions below. +* `compile_plain_inherits`: takes all inherits of a bindings node and compiles + the ones that are trivial to compile (i.e. just plain inherits without a + namespace). The `rnix` parser does not represent namespaced/plain inherits in + different nodes, so this function also aggregates the namespaced inherits and + returns them for further use +* `declare_namespaced_inherits`: passes over all namespaced inherits and + declares them on the locals stack, as well as inserts them into the provided + `TrackedBindings` +* `declare_bindings`: declares all regular key/value bindings in a bindings + scope, but without actually compiling their keys or values. + + There's a lot of heavy lifting going on here: + + 1. It invokes the various pieces of logic responsible for merging nested + attribute sets together, creating intermediate data structures in the value + slots of bindings that can be recursively processed the same way. + 2. It decides on the key slots of expressions based on the kind of bindings, + and the type of expression providing the key. +* `bind_values`: runs the actual compilation of values. Notably this function is + responsible for recursively compiling merged attribute sets when it encounters + a `Binding::Set` (on which it invokes `compile_bindings` itself). + +In addition to these several methods (such as `compile_attr_set`, +`compile_let_in`, ...) invoke the binding-kind specific logic and then call out +to the functions above. diff --git a/tvix/eval/src/builtins/impure.rs b/tvix/eval/src/builtins/impure.rs index 18403fe5d8..c82b910f5f 100644 --- a/tvix/eval/src/builtins/impure.rs +++ b/tvix/eval/src/builtins/impure.rs @@ -37,7 +37,7 @@ mod impure_builtins { Ok(p) => p, }; let r = generators::request_open_file(&co, path).await; - Ok(hash_nix_string(algo.to_str()?, r).map(Value::from)?) + hash_nix_string(algo.to_str()?, r).map(Value::from) } #[builtin("pathExists")] diff --git a/tvix/eval/src/vm/mod.rs b/tvix/eval/src/vm/mod.rs index c10b79cd99..5c244cc3ca 100644 --- a/tvix/eval/src/vm/mod.rs +++ b/tvix/eval/src/vm/mod.rs @@ -1148,7 +1148,7 @@ where let mut captured_with_stack = frame .upvalues .with_stack() - .map(Clone::clone) + .cloned() // ... or make an empty one if there isn't one already. .unwrap_or_else(|| Vec::with_capacity(self.with_stack.len())); diff --git a/tvix/eval/tests/nix_oracle.rs b/tvix/eval/tests/nix_oracle.rs index 6bab75cfd9..5a5cc0a822 100644 --- a/tvix/eval/tests/nix_oracle.rs +++ b/tvix/eval/tests/nix_oracle.rs @@ -30,7 +30,14 @@ fn nix_eval(expr: &str, strictness: Strictness) -> String { .arg(format!("({expr})")) .env( "NIX_REMOTE", - format!("local?root={}", store_dir.path().display()), + format!( + "local?root={}", + store_dir + .path() + .canonicalize() + .expect("valid path") + .display() + ), ) .output() .unwrap(); diff --git a/tvix/glue/Cargo.toml b/tvix/glue/Cargo.toml index f929d720a0..0afdefeaaa 100644 --- a/tvix/glue/Cargo.toml +++ b/tvix/glue/Cargo.toml @@ -4,7 +4,7 @@ version = "0.1.0" edition = "2021" [dependencies] -async-recursion = "1.0.5" +async-compression = { version = "0.4.9", features = ["tokio", "gzip", "bzip2", "xz"]} bstr = "1.6.0" bytes = "1.4.0" data-encoding = "2.3.3" @@ -30,10 +30,6 @@ md-5 = "0.10.6" url = "2.4.0" walkdir = "2.4.0" -[dependencies.async-compression] -version = "0.4.6" -features = ["tokio", "gzip", "bzip2", "xz"] - [dependencies.wu-manber] git = "https://github.com/tvlfyi/wu-manber.git" diff --git a/tvix/glue/benches/eval.rs b/tvix/glue/benches/eval.rs index dfb4fabe44..202278c1aa 100644 --- a/tvix/glue/benches/eval.rs +++ b/tvix/glue/benches/eval.rs @@ -2,10 +2,6 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; use lazy_static::lazy_static; use std::{env, rc::Rc, sync::Arc, time::Duration}; use tvix_build::buildservice::DummyBuildService; -use tvix_castore::{ - blobservice::{BlobService, MemoryBlobService}, - directoryservice::{DirectoryService, MemoryDirectoryService}, -}; use tvix_eval::{builtins::impure_builtins, EvalIO}; use tvix_glue::{ builtins::{add_derivation_builtins, add_fetcher_builtins, add_import_builtins}, @@ -13,16 +9,9 @@ use tvix_glue::{ tvix_io::TvixIO, tvix_store_io::TvixStoreIO, }; -use tvix_store::pathinfoservice::{MemoryPathInfoService, PathInfoService}; +use tvix_store::utils::construct_services; lazy_static! { - static ref BLOB_SERVICE: Arc<dyn BlobService> = Arc::new(MemoryBlobService::default()); - static ref DIRECTORY_SERVICE: Arc<dyn DirectoryService> = - Arc::new(MemoryDirectoryService::default()); - static ref PATH_INFO_SERVICE: Arc<dyn PathInfoService> = Arc::new(MemoryPathInfoService::new( - BLOB_SERVICE.clone(), - DIRECTORY_SERVICE.clone(), - )); static ref TOKIO_RUNTIME: tokio::runtime::Runtime = tokio::runtime::Runtime::new().unwrap(); } @@ -30,12 +19,17 @@ fn interpret(code: &str) { // TODO: this is a bit annoying. // It'd be nice if we could set this up once and then run evaluate() with a // piece of code. b/262 + let (blob_service, directory_service, path_info_service, nar_calculation_service) = + TOKIO_RUNTIME + .block_on(async { construct_services("memory://", "memory://", "memory://").await }) + .unwrap(); // We assemble a complete store in memory. let tvix_store_io = Rc::new(TvixStoreIO::new( - BLOB_SERVICE.clone(), - DIRECTORY_SERVICE.clone(), - PATH_INFO_SERVICE.clone(), + blob_service, + directory_service, + path_info_service.into(), + nar_calculation_service.into(), Arc::<DummyBuildService>::default(), TOKIO_RUNTIME.handle().clone(), )); diff --git a/tvix/glue/src/builtins/derivation.rs b/tvix/glue/src/builtins/derivation.rs index 8c7df96f91..a7742ae40a 100644 --- a/tvix/glue/src/builtins/derivation.rs +++ b/tvix/glue/src/builtins/derivation.rs @@ -457,55 +457,59 @@ pub(crate) mod derivation_builtins { drv.validate(false) .map_err(DerivationError::InvalidDerivation)?; - // Calculate the derivation_or_fod_hash for the current derivation. - // This one is still intermediate (so not added to known_paths) - let derivation_or_fod_hash_tmp = drv.derivation_or_fod_hash(|drv_path| { - known_paths - .get_hash_derivation_modulo(&drv_path.to_owned()) - .unwrap_or_else(|| panic!("{} not found", drv_path)) - .to_owned() - }); + // Calculate the hash_derivation_modulo for the current derivation.. + debug_assert!( + drv.outputs.values().all(|output| { output.path.is_none() }), + "outputs should still be unset" + ); // Mutate the Derivation struct and set output paths - drv.calculate_output_paths(name, &derivation_or_fod_hash_tmp) - .map_err(DerivationError::InvalidDerivation)?; + drv.calculate_output_paths( + name, + // This one is still intermediate (so not added to known_paths), + // as the outputs are still unset. + &drv.hash_derivation_modulo(|drv_path| { + *known_paths + .get_hash_derivation_modulo(&drv_path.to_owned()) + .unwrap_or_else(|| panic!("{} not found", drv_path)) + }), + ) + .map_err(DerivationError::InvalidDerivation)?; let drv_path = drv .calculate_derivation_path(name) .map_err(DerivationError::InvalidDerivation)?; - // TODO: avoid cloning - known_paths.add_derivation(drv_path.clone(), drv.clone()); - - let mut new_attrs: Vec<(String, NixString)> = drv - .outputs - .into_iter() - .map(|(name, output)| { - ( - name.clone(), + // Assemble the attrset to return from this builtin. + let out = Value::Attrs(Box::new(NixAttrs::from_iter( + drv.outputs + .iter() + .map(|(name, output)| { + ( + name.clone(), + NixString::new_context_from( + NixContextElement::Single { + name: name.clone(), + derivation: drv_path.to_absolute_path(), + } + .into(), + output.path.as_ref().unwrap().to_absolute_path(), + ), + ) + }) + .chain(std::iter::once(( + "drvPath".to_owned(), NixString::new_context_from( - NixContextElement::Single { - name, - derivation: drv_path.to_absolute_path(), - } - .into(), - output.path.unwrap().to_absolute_path(), + NixContextElement::Derivation(drv_path.to_absolute_path()).into(), + drv_path.to_absolute_path(), ), - ) - }) - .collect(); - - new_attrs.push(( - "drvPath".to_string(), - NixString::new_context_from( - NixContextElement::Derivation(drv_path.to_absolute_path()).into(), - drv_path.to_absolute_path(), - ), - )); - - Ok(Value::Attrs(Box::new(NixAttrs::from_iter( - new_attrs.into_iter(), - )))) + ))), + ))); + + // Register the Derivation in known_paths. + known_paths.add_derivation(drv_path, drv); + + Ok(out) } #[builtin("toFile")] diff --git a/tvix/glue/src/builtins/errors.rs b/tvix/glue/src/builtins/errors.rs index c05d366f13..f6d5745c56 100644 --- a/tvix/glue/src/builtins/errors.rs +++ b/tvix/glue/src/builtins/errors.rs @@ -6,6 +6,7 @@ use nix_compat::{ use reqwest::Url; use std::rc::Rc; use thiserror::Error; +use tvix_castore::import; /// Errors related to derivation construction #[derive(Debug, Error)] @@ -52,10 +53,7 @@ pub enum FetcherError { Io(#[from] std::io::Error), #[error(transparent)] - Import(#[from] tvix_castore::import::Error), - - #[error(transparent)] - ImportArchive(#[from] tvix_castore::import::archive::Error), + Import(#[from] tvix_castore::import::IngestionError<import::archive::Error>), #[error("Error calculating store path for fetcher output: {0}")] StorePath(#[from] BuildStorePathError), diff --git a/tvix/glue/src/builtins/import.rs b/tvix/glue/src/builtins/import.rs index 6814781df3..4a15afa814 100644 --- a/tvix/glue/src/builtins/import.rs +++ b/tvix/glue/src/builtins/import.rs @@ -95,9 +95,9 @@ async fn filtered_ingest( ); ingest_entries(&state.directory_service, entries) .await - .map_err(|err| ErrorKind::IO { + .map_err(|e| ErrorKind::IO { path: Some(path.to_path_buf()), - error: Rc::new(err.into()), + error: Rc::new(std::io::Error::new(std::io::ErrorKind::Other, e)), }) }) } @@ -178,7 +178,7 @@ mod import_builtins { CAHash::Nar(NixHash::Sha256(state.tokio_handle.block_on(async { Ok::<_, tvix_eval::ErrorKind>( state - .path_info_service + .nar_calculation_service .as_ref() .calculate_nar(&root_node) .await @@ -255,7 +255,7 @@ mod import_builtins { .tokio_handle .block_on(async { let (_, nar_sha256) = state - .path_info_service + .nar_calculation_service .as_ref() .calculate_nar(&root_node) .await?; diff --git a/tvix/glue/src/builtins/mod.rs b/tvix/glue/src/builtins/mod.rs index 4081489e0e..3d6263286d 100644 --- a/tvix/glue/src/builtins/mod.rs +++ b/tvix/glue/src/builtins/mod.rs @@ -68,7 +68,7 @@ mod tests { fn eval(str: &str) -> EvaluationResult { // We assemble a complete store in memory. let runtime = tokio::runtime::Runtime::new().expect("Failed to build a Tokio runtime"); - let (blob_service, directory_service, path_info_service) = runtime + let (blob_service, directory_service, path_info_service, nar_calculation_service) = runtime .block_on(async { construct_services("memory://", "memory://", "memory://").await }) .expect("Failed to construct store services in memory"); @@ -76,6 +76,7 @@ mod tests { blob_service, directory_service, path_info_service.into(), + nar_calculation_service.into(), Arc::<DummyBuildService>::default(), runtime.handle().clone(), )); @@ -739,6 +740,7 @@ mod tests { false )] fn builtins_filter_source_unsupported_files(#[case] code: &str, #[case] exp_success: bool) { + use nix::errno::Errno; use nix::sys::stat; use nix::unistd; use std::os::unix::net::UnixListener; @@ -765,6 +767,15 @@ mod tests { stat::Mode::S_IRWXU, 0, ) + .inspect_err(|e| { + if *e == Errno::EPERM { + eprintln!( + "\ +Missing permissions to create a character device node with mknod(2). +Please run this test as root or set CAP_MKNOD." + ); + } + }) .expect("Failed to create a character device node"); let code_replaced = code.replace("@fixtures", &temp.path().to_string_lossy()); diff --git a/tvix/glue/src/decompression.rs b/tvix/glue/src/fetchers/decompression.rs index 11dc9d9835..f96fa60e34 100644 --- a/tvix/glue/src/decompression.rs +++ b/tvix/glue/src/fetchers/decompression.rs @@ -204,9 +204,9 @@ mod tests { } #[rstest] - #[case::gzip(include_bytes!("tests/blob.tar.gz"))] - #[case::bzip2(include_bytes!("tests/blob.tar.bz2"))] - #[case::xz(include_bytes!("tests/blob.tar.xz"))] + #[case::gzip(include_bytes!("../tests/blob.tar.gz"))] + #[case::bzip2(include_bytes!("../tests/blob.tar.bz2"))] + #[case::xz(include_bytes!("../tests/blob.tar.xz"))] #[tokio::test] async fn compressed_tar(#[case] data: &[u8]) { let reader = DecompressedReader::new(BufReader::new(data)); diff --git a/tvix/glue/src/fetchers.rs b/tvix/glue/src/fetchers/mod.rs index 7560c447d8..1b2e1ee20c 100644 --- a/tvix/glue/src/fetchers.rs +++ b/tvix/glue/src/fetchers/mod.rs @@ -14,10 +14,13 @@ use tvix_castore::{ directoryservice::DirectoryService, proto::{node::Node, FileNode}, }; -use tvix_store::{pathinfoservice::PathInfoService, proto::PathInfo}; +use tvix_store::{nar::NarCalculationService, pathinfoservice::PathInfoService, proto::PathInfo}; use url::Url; -use crate::{builtins::FetcherError, decompression::DecompressedReader}; +use crate::builtins::FetcherError; + +mod decompression; +use decompression::DecompressedReader; /// Representing options for doing a fetch. #[derive(Clone, Eq, PartialEq)] @@ -28,7 +31,8 @@ pub enum Fetch { URL(Url, Option<NixHash>), /// Fetch a tarball from the given URL and unpack. - /// The file must be a tape archive (.tar) compressed with gzip, bzip2 or xz. + /// The file must be a tape archive (.tar), optionally compressed with gzip, + /// bzip2 or xz. /// The top-level path component of the files in the tarball is removed, /// so it is best if the tarball contains a single directory at top level. /// Optionally, a sha256 digest can be provided to verify the unpacked @@ -56,10 +60,10 @@ fn redact_url(url: &Url) -> Url { impl std::fmt::Debug for Fetch { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Fetch::URL(url, nixhash) => { + Fetch::URL(url, exp_hash) => { let url = redact_url(url); - if let Some(nixhash) = nixhash { - write!(f, "URL [url: {}, exp_hash: Some({})]", &url, nixhash) + if let Some(exp_hash) = exp_hash { + write!(f, "URL [url: {}, exp_hash: Some({})]", &url, exp_hash) } else { write!(f, "URL [url: {}, exp_hash: None]", &url) } @@ -102,20 +106,27 @@ impl Fetch { } /// Knows how to fetch a given [Fetch]. -pub struct Fetcher<BS, DS, PS> { +pub struct Fetcher<BS, DS, PS, NS> { http_client: reqwest::Client, blob_service: BS, directory_service: DS, path_info_service: PS, + nar_calculation_service: NS, } -impl<BS, DS, PS> Fetcher<BS, DS, PS> { - pub fn new(blob_service: BS, directory_service: DS, path_info_service: PS) -> Self { +impl<BS, DS, PS, NS> Fetcher<BS, DS, PS, NS> { + pub fn new( + blob_service: BS, + directory_service: DS, + path_info_service: PS, + nar_calculation_service: NS, + ) -> Self { Self { http_client: reqwest::Client::new(), blob_service, directory_service, path_info_service, + nar_calculation_service, } } @@ -166,11 +177,12 @@ async fn hash<D: Digest + std::io::Write>( Ok((hasher.finalize(), bytes_copied)) } -impl<BS, DS, PS> Fetcher<BS, DS, PS> +impl<BS, DS, PS, NS> Fetcher<BS, DS, PS, NS> where - BS: AsRef<(dyn BlobService + 'static)> + Clone + Send + Sync + 'static, - DS: AsRef<(dyn DirectoryService + 'static)>, + BS: BlobService + Clone + 'static, + DS: DirectoryService + Clone, PS: PathInfoService, + NS: NarCalculationService, { /// Ingest the data from a specified [Fetch]. /// On success, return the root node, a content digest and length. @@ -178,7 +190,7 @@ where /// didn't match the previously communicated hash contained inside the FetchArgs. pub async fn ingest(&self, fetch: Fetch) -> Result<(Node, CAHash, u64), FetcherError> { match fetch { - Fetch::URL(url, exp_nixhash) => { + Fetch::URL(url, exp_hash) => { // Construct a AsyncRead reading from the data as its downloaded. let mut r = self.download(url.clone()).await?; @@ -188,7 +200,7 @@ where // Copy the contents from the download reader to the blob writer. // Calculate the digest of the file received, depending on the // communicated expected hash (or sha256 if none provided). - let (actual_nixhash, blob_size) = match exp_nixhash + let (actual_hash, blob_size) = match exp_hash .as_ref() .map(NixHash::algo) .unwrap_or_else(|| HashAlgo::Sha256) @@ -209,12 +221,12 @@ where )?, }; - if let Some(exp_nixhash) = exp_nixhash { - if exp_nixhash != actual_nixhash { + if let Some(exp_hash) = exp_hash { + if exp_hash != actual_hash { return Err(FetcherError::HashMismatch { url, - wanted: exp_nixhash, - got: actual_nixhash, + wanted: exp_hash, + got: actual_hash, }); } } @@ -227,7 +239,7 @@ where size: blob_size, executable: false, }), - CAHash::Flat(actual_nixhash), + CAHash::Flat(actual_hash), blob_size, )) } @@ -243,7 +255,7 @@ where // Ingest the archive, get the root node let node = tvix_castore::import::archive::ingest_archive( self.blob_service.clone(), - &self.directory_service, + self.directory_service.clone(), archive, ) .await?; @@ -253,7 +265,7 @@ where // Even if no expected NAR sha256 has been provided, we need // the actual one later. let (nar_size, actual_nar_sha256) = self - .path_info_service + .nar_calculation_service .calculate_nar(&node) .await .map_err(|e| { @@ -305,7 +317,7 @@ where // the [PathInfo]. let (nar_size, nar_sha256) = match &ca_hash { CAHash::Flat(_nix_hash) => self - .path_info_service + .nar_calculation_service .calculate_nar(&node) .await .map_err(|e| FetcherError::Io(e.into()))?, @@ -379,12 +391,12 @@ mod tests { #[test] fn fetchurl_store_path() { let url = Url::parse("https://raw.githubusercontent.com/aaptel/notmuch-extract-patch/f732a53e12a7c91a06755ebfab2007adc9b3063b/notmuch-extract-patch").unwrap(); - let exp_nixhash = NixHash::Sha256( + let exp_hash = NixHash::Sha256( nixbase32::decode_fixed("0nawkl04sj7psw6ikzay7kydj3dhd0fkwghcsf5rzaw4bmp4kbax") .unwrap(), ); - let fetch = Fetch::URL(url, Some(exp_nixhash)); + let fetch = Fetch::URL(url, Some(exp_hash)); assert_eq!( "06qi00hylriyfm0nl827crgjvbax84mz-notmuch-extract-patch", &fetch diff --git a/tvix/glue/src/known_paths.rs b/tvix/glue/src/known_paths.rs index c95065592b..290c9d5b69 100644 --- a/tvix/glue/src/known_paths.rs +++ b/tvix/glue/src/known_paths.rs @@ -73,7 +73,7 @@ impl KnownPaths { } // compute the hash derivation modulo - let hash_derivation_modulo = drv.derivation_or_fod_hash(|drv_path| { + let hash_derivation_modulo = drv.hash_derivation_modulo(|drv_path| { self.get_hash_derivation_modulo(&drv_path.to_owned()) .unwrap_or_else(|| panic!("{} not found", drv_path)) .to_owned() diff --git a/tvix/glue/src/lib.rs b/tvix/glue/src/lib.rs index 8528f09e52..2e5a3be103 100644 --- a/tvix/glue/src/lib.rs +++ b/tvix/glue/src/lib.rs @@ -6,7 +6,6 @@ pub mod tvix_build; pub mod tvix_io; pub mod tvix_store_io; -mod decompression; #[cfg(test)] mod tests; diff --git a/tvix/glue/src/tests/mod.rs b/tvix/glue/src/tests/mod.rs index e66f484e3d..9fe0c22270 100644 --- a/tvix/glue/src/tests/mod.rs +++ b/tvix/glue/src/tests/mod.rs @@ -3,17 +3,15 @@ use std::{rc::Rc, sync::Arc}; use pretty_assertions::assert_eq; use std::path::PathBuf; use tvix_build::buildservice::DummyBuildService; -use tvix_castore::{ - blobservice::{BlobService, MemoryBlobService}, - directoryservice::{DirectoryService, MemoryDirectoryService}, -}; use tvix_eval::{EvalIO, Value}; -use tvix_store::pathinfoservice::{MemoryPathInfoService, PathInfoService}; +use tvix_store::utils::construct_services; use rstest::rstest; use crate::{ builtins::{add_derivation_builtins, add_fetcher_builtins, add_import_builtins}, + configure_nix_path, + tvix_io::TvixIO, tvix_store_io::TvixStoreIO, }; @@ -34,28 +32,31 @@ fn eval_test(code_path: PathBuf, expect_success: bool) { return; } - let blob_service = Arc::new(MemoryBlobService::default()) as Arc<dyn BlobService>; - let directory_service = - Arc::new(MemoryDirectoryService::default()) as Arc<dyn DirectoryService>; - let path_info_service = Box::new(MemoryPathInfoService::new( - blob_service.clone(), - directory_service.clone(), - )) as Box<dyn PathInfoService>; let tokio_runtime = tokio::runtime::Runtime::new().unwrap(); + let (blob_service, directory_service, path_info_service, nar_calculation_service) = + tokio_runtime + .block_on(async { construct_services("memory://", "memory://", "memory://").await }) + .unwrap(); let tvix_store_io = Rc::new(TvixStoreIO::new( blob_service, directory_service, path_info_service.into(), + nar_calculation_service.into(), Arc::new(DummyBuildService::default()), tokio_runtime.handle().clone(), )); - let mut eval = tvix_eval::Evaluation::new(tvix_store_io.clone() as Rc<dyn EvalIO>, true); + // Wrap with TvixIO, so <nix/fetchurl.nix can be imported. + let mut eval = tvix_eval::Evaluation::new( + Box::new(TvixIO::new(tvix_store_io.clone() as Rc<dyn EvalIO>)) as Box<dyn EvalIO>, + true, + ); eval.strict = true; add_derivation_builtins(&mut eval, tvix_store_io.clone()); add_fetcher_builtins(&mut eval, tvix_store_io.clone()); add_import_builtins(&mut eval, tvix_store_io.clone()); + configure_nix_path(&mut eval, &None); let result = eval.evaluate(code, Some(code_path.clone())); let failed = match result.value { diff --git a/tvix/glue/src/tvix_store_io.rs b/tvix/glue/src/tvix_store_io.rs index 1f709906de..7b8ef3ff0a 100644 --- a/tvix/glue/src/tvix_store_io.rs +++ b/tvix/glue/src/tvix_store_io.rs @@ -1,6 +1,5 @@ //! This module provides an implementation of EvalIO talking to tvix-store. -use async_recursion::async_recursion; use bytes::Bytes; use futures::{StreamExt, TryStreamExt}; use nix_compat::nixhash::NixHash; @@ -19,6 +18,7 @@ use tracing::{error, info, instrument, warn, Level}; use tvix_build::buildservice::BuildService; use tvix_castore::proto::node::Node; use tvix_eval::{EvalIO, FileType, StdIO}; +use tvix_store::nar::NarCalculationService; use tvix_store::utils::AsyncIoBridge; use tvix_castore::{ @@ -53,13 +53,20 @@ pub struct TvixStoreIO { pub(crate) blob_service: Arc<dyn BlobService>, pub(crate) directory_service: Arc<dyn DirectoryService>, pub(crate) path_info_service: Arc<dyn PathInfoService>, + pub(crate) nar_calculation_service: Arc<dyn NarCalculationService>, + std_io: StdIO, #[allow(dead_code)] build_service: Arc<dyn BuildService>, pub(crate) tokio_handle: tokio::runtime::Handle, - pub(crate) fetcher: - Fetcher<Arc<dyn BlobService>, Arc<dyn DirectoryService>, Arc<dyn PathInfoService>>, + #[allow(clippy::type_complexity)] + pub(crate) fetcher: Fetcher< + Arc<dyn BlobService>, + Arc<dyn DirectoryService>, + Arc<dyn PathInfoService>, + Arc<dyn NarCalculationService>, + >, // Paths known how to produce, by building or fetching. pub(crate) known_paths: RefCell<KnownPaths>, @@ -70,6 +77,7 @@ impl TvixStoreIO { blob_service: Arc<dyn BlobService>, directory_service: Arc<dyn DirectoryService>, path_info_service: Arc<dyn PathInfoService>, + nar_calculation_service: Arc<dyn NarCalculationService>, build_service: Arc<dyn BuildService>, tokio_handle: tokio::runtime::Handle, ) -> Self { @@ -77,10 +85,16 @@ impl TvixStoreIO { blob_service: blob_service.clone(), directory_service: directory_service.clone(), path_info_service: path_info_service.clone(), + nar_calculation_service: nar_calculation_service.clone(), std_io: StdIO {}, build_service, tokio_handle, - fetcher: Fetcher::new(blob_service, directory_service, path_info_service), + fetcher: Fetcher::new( + blob_service, + directory_service, + path_info_service, + nar_calculation_service, + ), known_paths: Default::default(), } } @@ -92,7 +106,6 @@ impl TvixStoreIO { /// /// In case there is no PathInfo yet, this means we need to build it /// (which currently is stubbed out still). - #[async_recursion(?Send)] #[instrument(skip(self, store_path), fields(store_path=%store_path), ret(level = Level::TRACE), err)] async fn store_path_to_node( &self, @@ -249,8 +262,10 @@ impl TvixStoreIO { let root_node = output.node.as_ref().expect("invalid root node"); // calculate the nar representation - let (nar_size, nar_sha256) = - self.path_info_service.calculate_nar(root_node).await?; + let (nar_size, nar_sha256) = self + .nar_calculation_service + .calculate_nar(root_node) + .await?; // assemble the PathInfo to persist let path_info = PathInfo { @@ -305,6 +320,9 @@ impl TvixStoreIO { }; // now with the root_node and sub_path, descend to the node requested. + // We convert sub_path to the castore model here. + let sub_path = tvix_castore::PathBuf::from_host_path(sub_path, true)?; + directoryservice::descend_to(&self.directory_service, root_node, sub_path) .await .map_err(|e| std::io::Error::new(io::ErrorKind::Other, e)) @@ -322,7 +340,7 @@ impl TvixStoreIO { // because the path info construct a narinfo which *always* // require a SHA256 of the NAR representation and the NAR size. let (nar_size, nar_sha256) = self - .path_info_service + .nar_calculation_service .as_ref() .calculate_nar(&root_node) .await?; @@ -563,6 +581,7 @@ impl EvalIO for TvixStoreIO { &self.blob_service, &self.directory_service, &self.path_info_service, + &self.nar_calculation_service, ) .await })?; @@ -583,12 +602,8 @@ mod tests { use bstr::ByteSlice; use tempfile::TempDir; use tvix_build::buildservice::DummyBuildService; - use tvix_castore::{ - blobservice::{BlobService, MemoryBlobService}, - directoryservice::{DirectoryService, MemoryDirectoryService}, - }; use tvix_eval::{EvalIO, EvaluationResult}; - use tvix_store::pathinfoservice::MemoryPathInfoService; + use tvix_store::utils::construct_services; use super::TvixStoreIO; use crate::builtins::{add_derivation_builtins, add_fetcher_builtins, add_import_builtins}; @@ -597,22 +612,19 @@ mod tests { /// Takes care of setting up the evaluator so it knows about the // `derivation` builtin. fn eval(str: &str) -> EvaluationResult { - let blob_service = Arc::new(MemoryBlobService::default()) as Arc<dyn BlobService>; - let directory_service = - Arc::new(MemoryDirectoryService::default()) as Arc<dyn DirectoryService>; - let path_info_service = Arc::new(MemoryPathInfoService::new( - blob_service.clone(), - directory_service.clone(), - )); - - let runtime = tokio::runtime::Runtime::new().unwrap(); + let tokio_runtime = tokio::runtime::Runtime::new().unwrap(); + let (blob_service, directory_service, path_info_service, nar_calculation_service) = + tokio_runtime + .block_on(async { construct_services("memory://", "memory://", "memory://").await }) + .unwrap(); let io = Rc::new(TvixStoreIO::new( - blob_service.clone(), - directory_service.clone(), - path_info_service, + blob_service, + directory_service, + path_info_service.into(), + nar_calculation_service.into(), Arc::<DummyBuildService>::default(), - runtime.handle().clone(), + tokio_runtime.handle().clone(), )); let mut eval = tvix_eval::Evaluation::new(io.clone() as Rc<dyn EvalIO>, true); diff --git a/tvix/nar-bridge/pkg/http/narinfo_get.go b/tvix/nar-bridge/pkg/http/narinfo_get.go index 98d85744d8..d43cb58078 100644 --- a/tvix/nar-bridge/pkg/http/narinfo_get.go +++ b/tvix/nar-bridge/pkg/http/narinfo_get.go @@ -96,37 +96,42 @@ func renderNarinfo( } func registerNarinfoGet(s *Server) { - // GET $outHash.narinfo looks up the PathInfo from the tvix-store, - // and then render a .narinfo file to the client. - // It will keep the PathInfo in the lookup map, - // so a subsequent GET /nar/ $narhash.nar request can find it. - s.handler.Get("/{outputhash:^["+nixbase32.Alphabet+"]{32}}.narinfo", func(w http.ResponseWriter, r *http.Request) { - defer r.Body.Close() - - ctx := r.Context() - log := log.WithField("outputhash", chi.URLParamFromCtx(ctx, "outputhash")) - - // parse the output hash sent in the request URL - outputHash, err := nixbase32.DecodeString(chi.URLParamFromCtx(ctx, "outputhash")) - if err != nil { - log.WithError(err).Error("unable to decode output hash from url") - w.WriteHeader(http.StatusBadRequest) - _, err := w.Write([]byte("unable to decode output hash from url")) + // GET/HEAD $outHash.narinfo looks up the PathInfo from the tvix-store, + // and, if it's a GET request, render a .narinfo file to the client. + // In both cases it will keep the PathInfo in the lookup map, + // so a subsequent GET/HEAD /nar/ $narhash.nar request can find it. + genNarinfoHandler := func(isHead bool) func(w http.ResponseWriter, r *http.Request) { + return func(w http.ResponseWriter, r *http.Request) { + defer r.Body.Close() + + ctx := r.Context() + log := log.WithField("outputhash", chi.URLParamFromCtx(ctx, "outputhash")) + + // parse the output hash sent in the request URL + outputHash, err := nixbase32.DecodeString(chi.URLParamFromCtx(ctx, "outputhash")) if err != nil { - log.WithError(err).Errorf("unable to write error message to client") + log.WithError(err).Error("unable to decode output hash from url") + w.WriteHeader(http.StatusBadRequest) + _, err := w.Write([]byte("unable to decode output hash from url")) + if err != nil { + log.WithError(err).Errorf("unable to write error message to client") + } + + return } - return - } - - err = renderNarinfo(ctx, log, s.pathInfoServiceClient, &s.narDbMu, s.narDb, outputHash, w, false) - if err != nil { - if errors.Is(err, fs.ErrNotExist) { - w.WriteHeader(http.StatusNotFound) - } else { - log.WithError(err).Warn("unable to render narinfo") - w.WriteHeader(http.StatusInternalServerError) + err = renderNarinfo(ctx, log, s.pathInfoServiceClient, &s.narDbMu, s.narDb, outputHash, w, isHead) + if err != nil { + if errors.Is(err, fs.ErrNotExist) { + w.WriteHeader(http.StatusNotFound) + } else { + log.WithError(err).Warn("unable to render narinfo") + w.WriteHeader(http.StatusInternalServerError) + } } } - }) + } + + s.handler.Get("/{outputhash:^["+nixbase32.Alphabet+"]{32}}.narinfo", genNarinfoHandler(false)) + s.handler.Head("/{outputhash:^["+nixbase32.Alphabet+"]{32}}.narinfo", genNarinfoHandler(true)) } diff --git a/tvix/nix-compat/src/derivation/mod.rs b/tvix/nix-compat/src/derivation/mod.rs index 07da127ed0..6e12e3ea86 100644 --- a/tvix/nix-compat/src/derivation/mod.rs +++ b/tvix/nix-compat/src/derivation/mod.rs @@ -188,11 +188,12 @@ impl Derivation { /// `fixed:out:${algo}:${digest}:${fodPath}` string is hashed instead of /// the A-Term. /// - /// If the derivation is not a fixed derivation, it's up to the caller of - /// this function to provide a lookup function to lookup these calculation - /// results of parent derivations at `fn_get_derivation_or_fod_hash` (by - /// drv path). - pub fn derivation_or_fod_hash<F>(&self, fn_get_derivation_or_fod_hash: F) -> [u8; 32] + /// It's up to the caller of this function to provide a (infallible) lookup + /// function to query [hash_derivation_modulo] of direct input derivations, + /// by their [StorePathRef]. + /// It will only be called in case the derivation is not a fixed-output + /// derivation. + pub fn hash_derivation_modulo<F>(&self, fn_lookup_hash_derivation_modulo: F) -> [u8; 32] where F: Fn(&StorePathRef) -> [u8; 32], { @@ -200,16 +201,16 @@ impl Derivation { // Non-Fixed-output derivations return the sha256 digest of the ATerm // notation, but with all input_derivation paths replaced by a recursive // call to this function. - // We use fn_get_derivation_or_fod_hash here, so callers can precompute this. + // We call [fn_lookup_hash_derivation_modulo] rather than recursing + // ourselves, so callers can precompute this. self.fod_digest().unwrap_or({ - // For each input_derivation, look up the - // derivation_or_fod_hash, and replace the derivation path with - // it's HEXLOWER digest. + // For each input_derivation, look up the hash derivation modulo, + // and replace the derivation path in the aterm with it's HEXLOWER digest. let aterm_bytes = self.to_aterm_bytes_with_replacements(&BTreeMap::from_iter( self.input_derivations .iter() .map(|(drv_path, output_names)| { - let hash = fn_get_derivation_or_fod_hash(&drv_path.into()); + let hash = fn_lookup_hash_derivation_modulo(&drv_path.into()); (hash, output_names.to_owned()) }), @@ -226,20 +227,22 @@ impl Derivation { /// and self.environment[$outputName] needs to be an empty string. /// /// Output path calculation requires knowledge of the - /// derivation_or_fod_hash [NixHash], which (in case of non-fixed-output - /// derivations) also requires knowledge of other hash_derivation_modulo - /// [NixHash]es. + /// [hash_derivation_modulo], which (in case of non-fixed-output + /// derivations) also requires knowledge of the [hash_derivation_modulo] of + /// input derivations (recursively). /// - /// We solve this by asking the caller of this function to provide the - /// hash_derivation_modulo of the current Derivation. + /// To avoid recursing and doing unnecessary calculation, we simply + /// ask the caller of this function to provide the result of the + /// [hash_derivation_modulo] call of the current [Derivation], + /// and leave it up to them to calculate it when needed. /// - /// On completion, self.environment[$outputName] and - /// self.outputs[$outputName].path are set to the calculated output path for all + /// On completion, `self.environment[$outputName]` and + /// `self.outputs[$outputName].path` are set to the calculated output path for all /// outputs. pub fn calculate_output_paths( &mut self, name: &str, - derivation_or_fod_hash: &[u8; 32], + hash_derivation_modulo: &[u8; 32], ) -> Result<(), DerivationError> { // The fingerprint and hash differs per output for (output_name, output) in self.outputs.iter_mut() { @@ -250,14 +253,14 @@ impl Derivation { let path_name = output_path_name(name, output_name); - // For fixed output derivation we use the per-output info, otherwise we use the - // derivation hash. + // For fixed output derivation we use [build_ca_path], otherwise we + // use [build_output_path] with [hash_derivation_modulo]. let abs_store_path = if let Some(ref hwm) = output.ca_hash { build_ca_path(&path_name, hwm, Vec::<String>::new(), false).map_err(|e| { DerivationError::InvalidOutputDerivationPath(output_name.to_string(), e) })? } else { - build_output_path(derivation_or_fod_hash, output_name, &path_name).map_err(|e| { + build_output_path(hash_derivation_modulo, output_name, &path_name).map_err(|e| { DerivationError::InvalidOutputDerivationPath( output_name.to_string(), store_path::BuildStorePathError::InvalidStorePath(e), diff --git a/tvix/nix-compat/src/derivation/tests/mod.rs b/tvix/nix-compat/src/derivation/tests/mod.rs index 63a65356bd..48d4e8926a 100644 --- a/tvix/nix-compat/src/derivation/tests/mod.rs +++ b/tvix/nix-compat/src/derivation/tests/mod.rs @@ -164,7 +164,7 @@ fn derivation_path(#[case] name: &str, #[case] expected_path: &str) { /// This trims all output paths from a Derivation struct, /// by setting outputs[$outputName].path and environment[$outputName] to the empty string. -fn derivation_with_trimmed_output_paths(derivation: &Derivation) -> Derivation { +fn derivation_without_output_paths(derivation: &Derivation) -> Derivation { let mut trimmed_env = derivation.environment.clone(); let mut trimmed_outputs = derivation.outputs.clone(); @@ -191,13 +191,13 @@ fn derivation_with_trimmed_output_paths(derivation: &Derivation) -> Derivation { #[rstest] #[case::fixed_sha256("0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv", hex!("724f3e3634fce4cbbbd3483287b8798588e80280660b9a63fd13a1bc90485b33"))] #[case::fixed_sha1("ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv", hex!("c79aebd0ce3269393d4a1fde2cbd1d975d879b40f0bf40a48f550edc107fd5df"))] -fn derivation_or_fod_hash(#[case] drv_path: &str, #[case] expected_digest: [u8; 32]) { +fn hash_derivation_modulo_fixed(#[case] drv_path: &str, #[case] expected_digest: [u8; 32]) { // read in the fixture let json_bytes = fs::read(format!("{}/ok/{}.json", RESOURCES_PATHS, drv_path)).expect("unable to read JSON"); let drv: Derivation = serde_json::from_slice(&json_bytes).expect("must deserialize"); - let actual = drv.derivation_or_fod_hash(|_| panic!("must not be called")); + let actual = drv.hash_derivation_modulo(|_| panic!("must not be called")); assert_eq!(expected_digest, actual); } @@ -224,13 +224,13 @@ fn output_paths(#[case] name: &str, #[case] drv_path_str: &str) { ) .expect("must succeed"); - // create a version with trimmed output paths, simulating we constructed - // the struct. - let mut derivation = derivation_with_trimmed_output_paths(&expected_derivation); + // create a version without output paths, simulating we constructed the + // struct. + let mut derivation = derivation_without_output_paths(&expected_derivation); - // calculate the derivation_or_fod_hash of derivation + // calculate the hash_derivation_modulo of Derivation // We don't expect the lookup function to be called for most derivations. - let calculated_derivation_or_fod_hash = derivation.derivation_or_fod_hash(|parent_drv_path| { + let actual_hash_derivation_modulo = derivation.hash_derivation_modulo(|parent_drv_path| { // 4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv may lookup /nix/store/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv // ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv may lookup /nix/store/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv if name == "foo" @@ -255,9 +255,9 @@ fn output_paths(#[case] name: &str, #[case] drv_path_str: &str) { let drv: Derivation = serde_json::from_slice(&json_bytes).expect("must deserialize"); - // calculate derivation_or_fod_hash for each parent. + // calculate hash_derivation_modulo for each parent. // This may not trigger subsequent requests, as both parents are FOD. - drv.derivation_or_fod_hash(|_| panic!("must not lookup")) + drv.hash_derivation_modulo(|_| panic!("must not lookup")) } else { // we only expect this to be called in the "foo" testcase, for the "bar derivations" panic!("may only be called for foo testcase on bar derivations"); @@ -265,7 +265,7 @@ fn output_paths(#[case] name: &str, #[case] drv_path_str: &str) { }); derivation - .calculate_output_paths(name, &calculated_derivation_or_fod_hash) + .calculate_output_paths(name, &actual_hash_derivation_modulo) .unwrap(); // The derivation should now look like it was before @@ -343,7 +343,7 @@ fn output_path_construction() { // calculate bar output paths let bar_calc_result = bar_drv.calculate_output_paths( "bar", - &bar_drv.derivation_or_fod_hash(|_| panic!("is FOD, should not lookup")), + &bar_drv.hash_derivation_modulo(|_| panic!("is FOD, should not lookup")), ); assert!(bar_calc_result.is_ok()); @@ -360,8 +360,8 @@ fn output_path_construction() { // now construct foo, which requires bar_drv // Note how we refer to the output path, drv name and replacement_str (with calculated output paths) of bar. let bar_output_path = &bar_drv.outputs.get("out").expect("must exist").path; - let bar_drv_derivation_or_fod_hash = - bar_drv.derivation_or_fod_hash(|_| panic!("is FOD, should not lookup")); + let bar_drv_hash_derivation_modulo = + bar_drv.hash_derivation_modulo(|_| panic!("is FOD, should not lookup")); let bar_drv_path = bar_drv .calculate_derivation_path("bar") @@ -408,11 +408,11 @@ fn output_path_construction() { // calculate foo output paths let foo_calc_result = foo_drv.calculate_output_paths( "foo", - &foo_drv.derivation_or_fod_hash(|drv_path| { + &foo_drv.hash_derivation_modulo(|drv_path| { if drv_path.to_string() != "0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv" { panic!("lookup called with unexpected drv_path: {}", drv_path); } - bar_drv_derivation_or_fod_hash + bar_drv_hash_derivation_modulo }), ); assert!(foo_calc_result.is_ok()); diff --git a/tvix/nix-compat/src/nar/mod.rs b/tvix/nix-compat/src/nar/mod.rs index 058977f4fc..c678d26ffb 100644 --- a/tvix/nix-compat/src/nar/mod.rs +++ b/tvix/nix-compat/src/nar/mod.rs @@ -1,4 +1,4 @@ -mod wire; +pub(crate) mod wire; pub mod reader; pub mod writer; diff --git a/tvix/nix-compat/src/nar/reader/async/mod.rs b/tvix/nix-compat/src/nar/reader/async/mod.rs new file mode 100644 index 0000000000..0808fba38c --- /dev/null +++ b/tvix/nix-compat/src/nar/reader/async/mod.rs @@ -0,0 +1,173 @@ +use std::{ + mem::MaybeUninit, + pin::Pin, + task::{self, Poll}, +}; + +use tokio::io::{self, AsyncBufRead, AsyncRead, ErrorKind::InvalidData}; + +// Required reading for understanding this module. +use crate::{ + nar::{self, wire::PadPar}, + wire::{self, BytesReader}, +}; + +mod read; +#[cfg(test)] +mod test; + +pub type Reader<'a> = dyn AsyncBufRead + Unpin + Send + 'a; + +/// Start reading a NAR file from `reader`. +pub async fn open<'a, 'r>(reader: &'a mut Reader<'r>) -> io::Result<Node<'a, 'r>> { + read::token(reader, &nar::wire::TOK_NAR).await?; + Node::new(reader).await +} + +pub enum Node<'a, 'r: 'a> { + Symlink { + target: Vec<u8>, + }, + File { + executable: bool, + reader: FileReader<'a, 'r>, + }, + Directory(DirReader<'a, 'r>), +} + +impl<'a, 'r: 'a> Node<'a, 'r> { + /// Start reading a [Node], matching the next [wire::Node]. + /// + /// Reading the terminating [wire::TOK_PAR] is done immediately for [Node::Symlink], + /// but is otherwise left to [DirReader] or [BytesReader]. + async fn new(reader: &'a mut Reader<'r>) -> io::Result<Self> { + Ok(match read::tag(reader).await? { + nar::wire::Node::Sym => { + let target = wire::read_bytes(reader, 1..=nar::wire::MAX_TARGET_LEN).await?; + + if target.contains(&0) { + return Err(InvalidData.into()); + } + + read::token(reader, &nar::wire::TOK_PAR).await?; + + Node::Symlink { target } + } + tag @ (nar::wire::Node::Reg | nar::wire::Node::Exe) => Node::File { + executable: tag == nar::wire::Node::Exe, + reader: FileReader { + inner: BytesReader::new_internal(reader, ..).await?, + }, + }, + nar::wire::Node::Dir => Node::Directory(DirReader::new(reader)), + }) + } +} + +/// File contents, readable through the [AsyncRead] trait. +/// +/// It comes with some caveats: +/// * You must always read the entire file, unless you intend to abandon the entire archive reader. +/// * You must abandon the entire archive reader upon the first error. +/// +/// It's fine to read exactly `reader.len()` bytes without ever seeing an explicit EOF. +pub struct FileReader<'a, 'r> { + inner: BytesReader<&'a mut Reader<'r>, PadPar>, +} + +impl<'a, 'r> FileReader<'a, 'r> { + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + pub fn len(&self) -> u64 { + self.inner.len() + } +} + +impl<'a, 'r> AsyncRead for FileReader<'a, 'r> { + fn poll_read( + self: Pin<&mut Self>, + cx: &mut task::Context, + buf: &mut io::ReadBuf, + ) -> Poll<io::Result<()>> { + Pin::new(&mut self.get_mut().inner).poll_read(cx, buf) + } +} + +impl<'a, 'r> AsyncBufRead for FileReader<'a, 'r> { + fn poll_fill_buf(self: Pin<&mut Self>, cx: &mut task::Context) -> Poll<io::Result<&[u8]>> { + Pin::new(&mut self.get_mut().inner).poll_fill_buf(cx) + } + + fn consume(self: Pin<&mut Self>, amt: usize) { + Pin::new(&mut self.get_mut().inner).consume(amt) + } +} + +/// A directory iterator, yielding a sequence of [Node]s. +/// It must be fully consumed before reading further from the [DirReader] that produced it, if any. +pub struct DirReader<'a, 'r> { + reader: &'a mut Reader<'r>, + /// Previous directory entry name. + /// We have to hang onto this to enforce name monotonicity. + prev_name: Vec<u8>, +} + +pub struct Entry<'a, 'r> { + pub name: &'a [u8], + pub node: Node<'a, 'r>, +} + +impl<'a, 'r> DirReader<'a, 'r> { + fn new(reader: &'a mut Reader<'r>) -> Self { + Self { + reader, + prev_name: vec![], + } + } + + /// Read the next [Entry] from the directory. + /// + /// We explicitly don't implement [Iterator], since treating this as + /// a regular Rust iterator will surely lead you astray. + /// + /// * You must always consume the entire iterator, unless you abandon the entire archive reader. + /// * You must abandon the entire archive reader on the first error. + /// * You must abandon the directory reader upon the first [None]. + /// * Even if you know the amount of elements up front, you must keep reading until you encounter [None]. + pub async fn next(&mut self) -> io::Result<Option<Entry<'_, 'r>>> { + // COME FROM the previous iteration: if we've already read an entry, + // read its terminating TOK_PAR here. + if !self.prev_name.is_empty() { + read::token(self.reader, &nar::wire::TOK_PAR).await?; + } + + if let nar::wire::Entry::None = read::tag(self.reader).await? { + return Ok(None); + } + + let mut name = [MaybeUninit::uninit(); nar::wire::MAX_NAME_LEN + 1]; + let name = + wire::read_bytes_buf(self.reader, &mut name, 1..=nar::wire::MAX_NAME_LEN).await?; + + if name.contains(&0) || name.contains(&b'/') || name == b"." || name == b".." { + return Err(InvalidData.into()); + } + + // Enforce strict monotonicity of directory entry names. + if &self.prev_name[..] >= name { + return Err(InvalidData.into()); + } + + self.prev_name.clear(); + self.prev_name.extend_from_slice(name); + + read::token(self.reader, &nar::wire::TOK_NOD).await?; + + Ok(Some(Entry { + name: &self.prev_name, + node: Node::new(self.reader).await?, + })) + } +} diff --git a/tvix/nix-compat/src/nar/reader/async/read.rs b/tvix/nix-compat/src/nar/reader/async/read.rs new file mode 100644 index 0000000000..2adf894922 --- /dev/null +++ b/tvix/nix-compat/src/nar/reader/async/read.rs @@ -0,0 +1,69 @@ +use tokio::io::{ + self, AsyncReadExt, + ErrorKind::{InvalidData, UnexpectedEof}, +}; + +use crate::nar::wire::Tag; + +use super::Reader; + +/// Consume a known token from the reader. +pub async fn token<const N: usize>(reader: &mut Reader<'_>, token: &[u8; N]) -> io::Result<()> { + let mut buf = [0u8; N]; + + // This implements something similar to [AsyncReadExt::read_exact], but verifies that + // the input data matches the token while we read it. These two slices respectively + // represent the remaining token to be verified, and the remaining input buffer. + let mut token = &token[..]; + let mut buf = &mut buf[..]; + + while !token.is_empty() { + match reader.read(buf).await? { + 0 => { + return Err(UnexpectedEof.into()); + } + n => { + let (t, b); + (t, token) = token.split_at(n); + (b, buf) = buf.split_at_mut(n); + + if t != b { + return Err(InvalidData.into()); + } + } + } + } + + Ok(()) +} + +/// Consume a [Tag] from the reader. +pub async fn tag<T: Tag>(reader: &mut Reader<'_>) -> io::Result<T> { + let mut buf = T::make_buf(); + let buf = buf.as_mut(); + + // first read the known minimum length… + reader.read_exact(&mut buf[..T::MIN]).await?; + + // then decide which tag we're expecting + let tag = T::from_u8(buf[T::OFF]).ok_or(InvalidData)?; + let (head, tail) = tag.as_bytes().split_at(T::MIN); + + // make sure what we've read so far is valid + if buf[..T::MIN] != *head { + return Err(InvalidData.into()); + } + + // …then read the rest, if any + if !tail.is_empty() { + let rest = tail.len(); + reader.read_exact(&mut buf[..rest]).await?; + + // and make sure it's what we expect + if buf[..rest] != *tail { + return Err(InvalidData.into()); + } + } + + Ok(tag) +} diff --git a/tvix/nix-compat/src/nar/reader/async/test.rs b/tvix/nix-compat/src/nar/reader/async/test.rs new file mode 100644 index 0000000000..7bc1f8942f --- /dev/null +++ b/tvix/nix-compat/src/nar/reader/async/test.rs @@ -0,0 +1,310 @@ +use tokio::io::AsyncReadExt; + +mod nar { + pub use crate::nar::reader::r#async as reader; +} + +#[tokio::test] +async fn symlink() { + let mut f = std::io::Cursor::new(include_bytes!("../../tests/symlink.nar")); + let node = nar::reader::open(&mut f).await.unwrap(); + + match node { + nar::reader::Node::Symlink { target } => { + assert_eq!( + &b"/nix/store/somewhereelse"[..], + &target, + "target must match" + ); + } + _ => panic!("unexpected type"), + } +} + +#[tokio::test] +async fn file() { + let mut f = std::io::Cursor::new(include_bytes!("../../tests/helloworld.nar")); + let node = nar::reader::open(&mut f).await.unwrap(); + + match node { + nar::reader::Node::File { + executable, + mut reader, + } => { + assert!(!executable); + let mut buf = vec![]; + reader + .read_to_end(&mut buf) + .await + .expect("read must succeed"); + assert_eq!(&b"Hello World!"[..], &buf); + } + _ => panic!("unexpected type"), + } +} + +#[tokio::test] +async fn complicated() { + let mut f = std::io::Cursor::new(include_bytes!("../../tests/complicated.nar")); + let node = nar::reader::open(&mut f).await.unwrap(); + + match node { + nar::reader::Node::Directory(mut dir_reader) => { + // first entry is .keep, an empty regular file. + must_read_file( + ".keep", + dir_reader + .next() + .await + .expect("next must succeed") + .expect("must be some"), + ) + .await; + + // second entry is aa, a symlink to /nix/store/somewhereelse + must_be_symlink( + "aa", + "/nix/store/somewhereelse", + dir_reader + .next() + .await + .expect("next must be some") + .expect("must be some"), + ); + + { + // third entry is a directory called "keep" + let entry = dir_reader + .next() + .await + .expect("next must be some") + .expect("must be some"); + + assert_eq!(b"keep", entry.name); + + match entry.node { + nar::reader::Node::Directory(mut subdir_reader) => { + { + // first entry is .keep, an empty regular file. + let entry = subdir_reader + .next() + .await + .expect("next must succeed") + .expect("must be some"); + + must_read_file(".keep", entry).await; + } + + // we must read the None + assert!( + subdir_reader + .next() + .await + .expect("next must succeed") + .is_none(), + "keep directory contains only .keep" + ); + } + _ => panic!("unexpected type for keep/.keep"), + } + }; + + // reading more entries yields None (and we actually must read until this) + assert!(dir_reader.next().await.expect("must succeed").is_none()); + } + _ => panic!("unexpected type"), + } +} + +#[tokio::test] +#[should_panic] +#[ignore = "TODO: async poisoning"] +async fn file_read_abandoned() { + let mut f = std::io::Cursor::new(include_bytes!("../../tests/complicated.nar")); + let node = nar::reader::open(&mut f).await.unwrap(); + + match node { + nar::reader::Node::Directory(mut dir_reader) => { + // first entry is .keep, an empty regular file. + { + let entry = dir_reader + .next() + .await + .expect("next must succeed") + .expect("must be some"); + + assert_eq!(b".keep", entry.name); + // don't bother to finish reading it. + }; + + // this should panic (not return an error), because we are meant to abandon the archive reader now. + assert!(dir_reader.next().await.expect("must succeed").is_none()); + } + _ => panic!("unexpected type"), + } +} + +#[tokio::test] +#[should_panic] +#[ignore = "TODO: async poisoning"] +async fn dir_read_abandoned() { + let mut f = std::io::Cursor::new(include_bytes!("../../tests/complicated.nar")); + let node = nar::reader::open(&mut f).await.unwrap(); + + match node { + nar::reader::Node::Directory(mut dir_reader) => { + // first entry is .keep, an empty regular file. + must_read_file( + ".keep", + dir_reader + .next() + .await + .expect("next must succeed") + .expect("must be some"), + ) + .await; + + // second entry is aa, a symlink to /nix/store/somewhereelse + must_be_symlink( + "aa", + "/nix/store/somewhereelse", + dir_reader + .next() + .await + .expect("next must be some") + .expect("must be some"), + ); + + { + // third entry is a directory called "keep" + let entry = dir_reader + .next() + .await + .expect("next must be some") + .expect("must be some"); + + assert_eq!(b"keep", entry.name); + + match entry.node { + nar::reader::Node::Directory(_) => { + // don't finish using it, which poisons the archive reader + } + _ => panic!("unexpected type for keep/.keep"), + } + }; + + // this should panic, because we didn't finish reading the child subdirectory + assert!(dir_reader.next().await.expect("must succeed").is_none()); + } + _ => panic!("unexpected type"), + } +} + +#[tokio::test] +#[should_panic] +#[ignore = "TODO: async poisoning"] +async fn dir_read_after_none() { + let mut f = std::io::Cursor::new(include_bytes!("../../tests/complicated.nar")); + let node = nar::reader::open(&mut f).await.unwrap(); + + match node { + nar::reader::Node::Directory(mut dir_reader) => { + // first entry is .keep, an empty regular file. + must_read_file( + ".keep", + dir_reader + .next() + .await + .expect("next must succeed") + .expect("must be some"), + ) + .await; + + // second entry is aa, a symlink to /nix/store/somewhereelse + must_be_symlink( + "aa", + "/nix/store/somewhereelse", + dir_reader + .next() + .await + .expect("next must be some") + .expect("must be some"), + ); + + { + // third entry is a directory called "keep" + let entry = dir_reader + .next() + .await + .expect("next must be some") + .expect("must be some"); + + assert_eq!(b"keep", entry.name); + + match entry.node { + nar::reader::Node::Directory(mut subdir_reader) => { + // first entry is .keep, an empty regular file. + must_read_file( + ".keep", + subdir_reader + .next() + .await + .expect("next must succeed") + .expect("must be some"), + ) + .await; + + // we must read the None + assert!( + subdir_reader + .next() + .await + .expect("next must succeed") + .is_none(), + "keep directory contains only .keep" + ); + } + _ => panic!("unexpected type for keep/.keep"), + } + }; + + // reading more entries yields None (and we actually must read until this) + assert!(dir_reader.next().await.expect("must succeed").is_none()); + + // this should panic, because we already got a none so we're meant to stop. + dir_reader.next().await.unwrap(); + unreachable!() + } + _ => panic!("unexpected type"), + } +} + +async fn must_read_file(name: &'static str, entry: nar::reader::Entry<'_, '_>) { + assert_eq!(name.as_bytes(), entry.name); + + match entry.node { + nar::reader::Node::File { + executable, + mut reader, + } => { + assert!(!executable); + assert_eq!(reader.read(&mut [0]).await.unwrap(), 0); + } + _ => panic!("unexpected type for {}", name), + } +} + +fn must_be_symlink( + name: &'static str, + exp_target: &'static str, + entry: nar::reader::Entry<'_, '_>, +) { + assert_eq!(name.as_bytes(), entry.name); + + match entry.node { + nar::reader::Node::Symlink { target } => { + assert_eq!(exp_target.as_bytes(), &target); + } + _ => panic!("unexpected type for {}", name), + } +} diff --git a/tvix/nix-compat/src/nar/reader/mod.rs b/tvix/nix-compat/src/nar/reader/mod.rs index 75463a6450..9e9237ead3 100644 --- a/tvix/nix-compat/src/nar/reader/mod.rs +++ b/tvix/nix-compat/src/nar/reader/mod.rs @@ -10,9 +10,15 @@ use std::io::{ Read, Write, }; +#[cfg(not(debug_assertions))] +use std::marker::PhantomData; + // Required reading for understanding this module. use crate::nar::wire; +#[cfg(feature = "async")] +pub mod r#async; + mod read; #[cfg(test)] mod test; @@ -27,25 +33,15 @@ struct ArchiveReader<'a, 'r> { /// * An error is encountered at any point /// * A file or directory reader is dropped before being read entirely. /// All of these checks vanish in release mode. - #[cfg(debug_assertions)] status: ArchiveReaderStatus<'a>, } -macro_rules! poison { - ($it:expr) => { - #[cfg(debug_assertions)] - { - $it.status.poison(); - } - }; -} - macro_rules! try_or_poison { ($it:expr, $ex:expr) => { match $ex { Ok(x) => x, Err(e) => { - poison!($it); + $it.status.poison(); return Err(e.into()); } } @@ -56,11 +52,7 @@ pub fn open<'a, 'r>(reader: &'a mut Reader<'r>) -> io::Result<Node<'a, 'r>> { read::token(reader, &wire::TOK_NAR)?; Node::new(ArchiveReader { inner: reader, - #[cfg(debug_assertions)] - status: ArchiveReaderStatus::StackTop { - poisoned: false, - ready: true, - }, + status: ArchiveReaderStatus::top(), }) } @@ -80,7 +72,6 @@ impl<'a, 'r> Node<'a, 'r> { /// /// Reading the terminating [wire::TOK_PAR] is done immediately for [Node::Symlink], /// but is otherwise left to [DirReader] or [FileReader]. - #[allow(unused_mut)] // due to debug_assertions code fn new(mut reader: ArchiveReader<'a, 'r>) -> io::Result<Self> { Ok(match read::tag(reader.inner)? { wire::Node::Sym => { @@ -88,15 +79,12 @@ impl<'a, 'r> Node<'a, 'r> { try_or_poison!(reader, read::bytes(reader.inner, wire::MAX_TARGET_LEN)); if target.is_empty() || target.contains(&0) { - poison!(reader); + reader.status.poison(); return Err(InvalidData.into()); } try_or_poison!(reader, read::token(reader.inner, &wire::TOK_PAR)); - #[cfg(debug_assertions)] - { - reader.status.ready_parent(); // Immediately allow reading from parent again - } + reader.status.ready_parent(); // Immediately allow reading from parent again Node::Symlink { target } } @@ -131,17 +119,13 @@ pub struct FileReader<'a, 'r> { impl<'a, 'r> FileReader<'a, 'r> { /// Instantiate a new reader, starting after [wire::TOK_REG] or [wire::TOK_EXE]. /// We handle the terminating [wire::TOK_PAR] on semantic EOF. - #[allow(unused_mut)] // due to debug_assertions code fn new(mut reader: ArchiveReader<'a, 'r>, len: u64) -> io::Result<Self> { // For zero-length files, we have to read the terminating TOK_PAR // immediately, since FileReader::read may never be called; we've // already reached semantic EOF by definition. if len == 0 { read::token(reader.inner, &wire::TOK_PAR)?; - #[cfg(debug_assertions)] - { - reader.status.ready_parent(); - } + reader.status.ready_parent(); } Ok(Self { @@ -175,7 +159,7 @@ impl FileReader<'_, '_> { let mut buf = try_or_poison!(self.reader, self.reader.inner.fill_buf()); if buf.is_empty() { - poison!(self.reader); + self.reader.status.poison(); return Err(UnexpectedEof.into()); } @@ -237,7 +221,7 @@ impl Read for FileReader<'_, '_> { self.len -= n as u64; if n == 0 { - poison!(self.reader); + self.reader.status.poison(); return Err(UnexpectedEof.into()); } @@ -260,18 +244,15 @@ impl FileReader<'_, '_> { try_or_poison!(self.reader, self.reader.inner.read_exact(&mut buf[pad..])); if buf != [0; 8] { - poison!(self.reader); + self.reader.status.poison(); return Err(InvalidData.into()); } } try_or_poison!(self.reader, read::token(self.reader.inner, &wire::TOK_PAR)); - #[cfg(debug_assertions)] - { - // Done with reading this file, allow going back up the chain of readers - self.reader.status.ready_parent(); - } + // Done with reading this file, allow going back up the chain of readers + self.reader.status.ready_parent(); Ok(()) } @@ -283,11 +264,11 @@ pub struct DirReader<'a, 'r> { reader: ArchiveReader<'a, 'r>, /// Previous directory entry name. /// We have to hang onto this to enforce name monotonicity. - prev_name: Option<Vec<u8>>, + prev_name: Vec<u8>, } pub struct Entry<'a, 'r> { - pub name: Vec<u8>, + pub name: &'a [u8], pub node: Node<'a, 'r>, } @@ -295,7 +276,7 @@ impl<'a, 'r> DirReader<'a, 'r> { fn new(reader: ArchiveReader<'a, 'r>) -> Self { Self { reader, - prev_name: None, + prev_name: vec![], } } @@ -314,23 +295,21 @@ impl<'a, 'r> DirReader<'a, 'r> { // COME FROM the previous iteration: if we've already read an entry, // read its terminating TOK_PAR here. - if self.prev_name.is_some() { + if !self.prev_name.is_empty() { try_or_poison!(self.reader, read::token(self.reader.inner, &wire::TOK_PAR)); } // Determine if there are more entries to follow if let wire::Entry::None = try_or_poison!(self.reader, read::tag(self.reader.inner)) { // We've reached the end of this directory. - #[cfg(debug_assertions)] - { - self.reader.status.ready_parent(); - } + self.reader.status.ready_parent(); return Ok(None); } + let mut name = [0; wire::MAX_NAME_LEN + 1]; let name = try_or_poison!( self.reader, - read::bytes(self.reader.inner, wire::MAX_NAME_LEN) + read::bytes_buf(self.reader.inner, &mut name, wire::MAX_NAME_LEN) ); if name.is_empty() @@ -339,29 +318,23 @@ impl<'a, 'r> DirReader<'a, 'r> { || name == b"." || name == b".." { - poison!(self.reader); + self.reader.status.poison(); return Err(InvalidData.into()); } // Enforce strict monotonicity of directory entry names. - match &mut self.prev_name { - None => { - self.prev_name = Some(name.clone()); - } - Some(prev_name) => { - if *prev_name >= name { - poison!(self.reader); - return Err(InvalidData.into()); - } - - name[..].clone_into(prev_name); - } + if &self.prev_name[..] >= name { + self.reader.status.poison(); + return Err(InvalidData.into()); } + self.prev_name.clear(); + self.prev_name.extend_from_slice(name); + try_or_poison!(self.reader, read::token(self.reader.inner, &wire::TOK_NOD)); Ok(Some(Entry { - name, + name: &self.prev_name, // Don't need to worry about poisoning here: Node::new will do it for us if needed node: Node::new(self.reader.child())?, })) @@ -373,12 +346,12 @@ impl<'a, 'r> DirReader<'a, 'r> { /// so we can check they are abandoned when an error occurs /// * Make sure only the most recently created object is read from, and is fully exhausted /// before anything it was created from is used again. -#[cfg(debug_assertions)] enum ArchiveReaderStatus<'a> { - StackTop { - poisoned: bool, - ready: bool, - }, + #[cfg(not(debug_assertions))] + None(PhantomData<&'a ()>), + #[cfg(debug_assertions)] + StackTop { poisoned: bool, ready: bool }, + #[cfg(debug_assertions)] StackChild { poisoned: &'a mut bool, parent_ready: &'a mut bool, @@ -386,12 +359,28 @@ enum ArchiveReaderStatus<'a> { }, } -#[cfg(debug_assertions)] impl ArchiveReaderStatus<'_> { + fn top() -> Self { + #[cfg(debug_assertions)] + { + ArchiveReaderStatus::StackTop { + poisoned: false, + ready: true, + } + } + + #[cfg(not(debug_assertions))] + ArchiveReaderStatus::None(PhantomData) + } + /// Poison all the objects sharing the same reader, to be used when an error occurs fn poison(&mut self) { match self { + #[cfg(not(debug_assertions))] + ArchiveReaderStatus::None(_) => {} + #[cfg(debug_assertions)] ArchiveReaderStatus::StackTop { poisoned: x, .. } => *x = true, + #[cfg(debug_assertions)] ArchiveReaderStatus::StackChild { poisoned: x, .. } => **x = true, } } @@ -399,10 +388,14 @@ impl ArchiveReaderStatus<'_> { /// Mark the parent as ready, allowing it to be used again and preventing this reference to the reader being used again. fn ready_parent(&mut self) { match self { - Self::StackTop { ready, .. } => { + #[cfg(not(debug_assertions))] + ArchiveReaderStatus::None(_) => {} + #[cfg(debug_assertions)] + ArchiveReaderStatus::StackTop { ready, .. } => { *ready = false; } - Self::StackChild { + #[cfg(debug_assertions)] + ArchiveReaderStatus::StackChild { ready, parent_ready, .. @@ -415,15 +408,23 @@ impl ArchiveReaderStatus<'_> { fn poisoned(&self) -> bool { match self { - Self::StackTop { poisoned, .. } => *poisoned, - Self::StackChild { poisoned, .. } => **poisoned, + #[cfg(not(debug_assertions))] + ArchiveReaderStatus::None(_) => false, + #[cfg(debug_assertions)] + ArchiveReaderStatus::StackTop { poisoned, .. } => *poisoned, + #[cfg(debug_assertions)] + ArchiveReaderStatus::StackChild { poisoned, .. } => **poisoned, } } fn ready(&self) -> bool { match self { - Self::StackTop { ready, .. } => *ready, - Self::StackChild { ready, .. } => *ready, + #[cfg(not(debug_assertions))] + ArchiveReaderStatus::None(_) => true, + #[cfg(debug_assertions)] + ArchiveReaderStatus::StackTop { ready, .. } => *ready, + #[cfg(debug_assertions)] + ArchiveReaderStatus::StackChild { ready, .. } => *ready, } } } @@ -434,6 +435,8 @@ impl<'a, 'r> ArchiveReader<'a, 'r> { fn child(&mut self) -> ArchiveReader<'_, 'r> { ArchiveReader { inner: self.inner, + #[cfg(not(debug_assertions))] + status: ArchiveReaderStatus::None(PhantomData), #[cfg(debug_assertions)] status: match &mut self.status { ArchiveReaderStatus::StackTop { poisoned, ready } => { @@ -462,16 +465,13 @@ impl<'a, 'r> ArchiveReader<'a, 'r> { /// Only does anything when debug assertions are on. #[inline(always)] fn check_correct(&self) { - #[cfg(debug_assertions)] - { - debug_assert!( - !self.status.poisoned(), - "Archive reader used after it was meant to be abandoned!" - ); - debug_assert!( - self.status.ready(), - "Non-ready archive reader used! (Should've been reading from something else)" - ) - } + assert!( + !self.status.poisoned(), + "Archive reader used after it was meant to be abandoned!" + ); + assert!( + self.status.ready(), + "Non-ready archive reader used! (Should've been reading from something else)" + ); } } diff --git a/tvix/nix-compat/src/nar/reader/read.rs b/tvix/nix-compat/src/nar/reader/read.rs index 1ce1613764..9938581f2a 100644 --- a/tvix/nix-compat/src/nar/reader/read.rs +++ b/tvix/nix-compat/src/nar/reader/read.rs @@ -15,6 +15,38 @@ pub fn u64(reader: &mut Reader) -> io::Result<u64> { Ok(u64::from_le_bytes(buf)) } +/// Consume a byte string from the reader into a provided buffer, +/// returning the data bytes. +pub fn bytes_buf<'a, const N: usize>( + reader: &mut Reader, + buf: &'a mut [u8; N], + max_len: usize, +) -> io::Result<&'a [u8]> { + assert_eq!(N % 8, 0); + assert!(max_len <= N); + + // read the length, and reject excessively large values + let len = self::u64(reader)?; + if len > max_len as u64 { + return Err(InvalidData.into()); + } + // we know the length fits in a usize now + let len = len as usize; + + // read the data and padding into a buffer + let buf_len = (len + 7) & !7; + reader.read_exact(&mut buf[..buf_len])?; + + // verify that the padding is all zeroes + for &b in &buf[len..buf_len] { + if b != 0 { + return Err(InvalidData.into()); + } + } + + Ok(&buf[..len]) +} + /// Consume a byte string of up to `max_len` bytes from the reader. pub fn bytes(reader: &mut Reader, max_len: usize) -> io::Result<Vec<u8>> { assert!(max_len <= isize::MAX as usize); diff --git a/tvix/nix-compat/src/nar/reader/test.rs b/tvix/nix-compat/src/nar/reader/test.rs index 02dc4767c9..63e4fb289f 100644 --- a/tvix/nix-compat/src/nar/reader/test.rs +++ b/tvix/nix-compat/src/nar/reader/test.rs @@ -71,7 +71,7 @@ fn complicated() { .expect("next must be some") .expect("must be some"); - assert_eq!(&b"keep"[..], &entry.name); + assert_eq!(b"keep", entry.name); match entry.node { nar::reader::Node::Directory(mut subdir_reader) => { @@ -117,7 +117,7 @@ fn file_read_abandoned() { .expect("next must succeed") .expect("must be some"); - assert_eq!(&b".keep"[..], &entry.name); + assert_eq!(b".keep", entry.name); // don't bother to finish reading it. }; @@ -162,7 +162,7 @@ fn dir_read_abandoned() { .expect("next must be some") .expect("must be some"); - assert_eq!(&b"keep"[..], &entry.name); + assert_eq!(b"keep", entry.name); match entry.node { nar::reader::Node::Directory(_) => { @@ -213,7 +213,7 @@ fn dir_read_after_none() { .expect("next must be some") .expect("must be some"); - assert_eq!(&b"keep"[..], &entry.name); + assert_eq!(b"keep", entry.name); match entry.node { nar::reader::Node::Directory(mut subdir_reader) => { @@ -248,7 +248,7 @@ fn dir_read_after_none() { } fn must_read_file(name: &'static str, entry: nar::reader::Entry<'_, '_>) { - assert_eq!(name.as_bytes(), &entry.name); + assert_eq!(name.as_bytes(), entry.name); match entry.node { nar::reader::Node::File { @@ -267,7 +267,7 @@ fn must_be_symlink( exp_target: &'static str, entry: nar::reader::Entry<'_, '_>, ) { - assert_eq!(name.as_bytes(), &entry.name); + assert_eq!(name.as_bytes(), entry.name); match entry.node { nar::reader::Node::Symlink { target } => { diff --git a/tvix/nix-compat/src/nar/wire/mod.rs b/tvix/nix-compat/src/nar/wire/mod.rs index b9e0212495..9e99b530ce 100644 --- a/tvix/nix-compat/src/nar/wire/mod.rs +++ b/tvix/nix-compat/src/nar/wire/mod.rs @@ -90,6 +90,23 @@ pub const TOK_DIR: [u8; 24] = *b"\x09\0\0\0\0\0\0\0directory\0\0\0\0\0\0\0"; pub const TOK_ENT: [u8; 48] = *b"\x05\0\0\0\0\0\0\0entry\0\0\0\x01\0\0\0\0\0\0\0(\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0name\0\0\0\0"; pub const TOK_NOD: [u8; 48] = *b"\x04\0\0\0\0\0\0\0node\0\0\0\0\x01\0\0\0\0\0\0\0(\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0type\0\0\0\0"; pub const TOK_PAR: [u8; 16] = *b"\x01\0\0\0\0\0\0\0)\0\0\0\0\0\0\0"; +#[cfg(feature = "async")] +const TOK_PAD_PAR: [u8; 24] = *b"\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0)\0\0\0\0\0\0\0"; + +#[cfg(feature = "async")] +#[derive(Debug)] +pub(crate) enum PadPar {} + +#[cfg(feature = "async")] +impl crate::wire::reader::Tag for PadPar { + const PATTERN: &'static [u8] = &TOK_PAD_PAR; + + type Buf = [u8; 24]; + + fn make_buf() -> Self::Buf { + [0; 24] + } +} #[test] fn tokens() { diff --git a/tvix/nix-compat/src/nix_daemon/worker_protocol.rs b/tvix/nix-compat/src/nix_daemon/worker_protocol.rs index 58a48d1bdd..7e3adc0db2 100644 --- a/tvix/nix-compat/src/nix_daemon/worker_protocol.rs +++ b/tvix/nix-compat/src/nix_daemon/worker_protocol.rs @@ -15,13 +15,34 @@ static WORKER_MAGIC_1: u64 = 0x6e697863; // "nixc" static WORKER_MAGIC_2: u64 = 0x6478696f; // "dxio" pub static STDERR_LAST: u64 = 0x616c7473; // "alts" +/// | Nix version | Protocol | +/// |-----------------|----------| +/// | 0.11 | 1.02 | +/// | 0.12 | 1.04 | +/// | 0.13 | 1.05 | +/// | 0.14 | 1.05 | +/// | 0.15 | 1.05 | +/// | 0.16 | 1.06 | +/// | 1.0 | 1.10 | +/// | 1.1 | 1.11 | +/// | 1.2 | 1.12 | +/// | 1.3 - 1.5.3 | 1.13 | +/// | 1.6 - 1.10 | 1.14 | +/// | 1.11 - 1.11.16 | 1.15 | +/// | 2.0 - 2.0.4 | 1.20 | +/// | 2.1 - 2.3.18 | 1.21 | +/// | 2.4 - 2.6.1 | 1.32 | +/// | 2.7.0 | 1.33 | +/// | 2.8.0 - 2.14.1 | 1.34 | +/// | 2.15.0 - 2.19.4 | 1.35 | +/// | 2.20.0 - 2.22.0 | 1.37 | static PROTOCOL_VERSION: ProtocolVersion = ProtocolVersion::from_parts(1, 37); /// Max length of a Nix setting name/value. In bytes. /// /// This value has been arbitrarily choosen after looking the nix.conf /// manpage. Don't hesitate to increase it if it's too limiting. -pub static MAX_SETTING_SIZE: u64 = 1024; +pub static MAX_SETTING_SIZE: usize = 1024; /// Worker Operation /// @@ -131,30 +152,30 @@ pub async fn read_client_settings<R: AsyncReadExt + Unpin>( r: &mut R, client_version: ProtocolVersion, ) -> std::io::Result<ClientSettings> { - let keep_failed = wire::read_bool(r).await?; - let keep_going = wire::read_bool(r).await?; - let try_fallback = wire::read_bool(r).await?; - let verbosity_uint = wire::read_u64(r).await?; + let keep_failed = r.read_u64_le().await? != 0; + let keep_going = r.read_u64_le().await? != 0; + let try_fallback = r.read_u64_le().await? != 0; + let verbosity_uint = r.read_u64_le().await?; let verbosity = Verbosity::from_u64(verbosity_uint).ok_or_else(|| { Error::new( ErrorKind::InvalidData, format!("Can't convert integer {} to verbosity", verbosity_uint), ) })?; - let max_build_jobs = wire::read_u64(r).await?; - let max_silent_time = wire::read_u64(r).await?; - _ = wire::read_u64(r).await?; // obsolete useBuildHook - let verbose_build = wire::read_bool(r).await?; - _ = wire::read_u64(r).await?; // obsolete logType - _ = wire::read_u64(r).await?; // obsolete printBuildTrace - let build_cores = wire::read_u64(r).await?; - let use_substitutes = wire::read_bool(r).await?; + let max_build_jobs = r.read_u64_le().await?; + let max_silent_time = r.read_u64_le().await?; + _ = r.read_u64_le().await?; // obsolete useBuildHook + let verbose_build = r.read_u64_le().await? != 0; + _ = r.read_u64_le().await?; // obsolete logType + _ = r.read_u64_le().await?; // obsolete printBuildTrace + let build_cores = r.read_u64_le().await?; + let use_substitutes = r.read_u64_le().await? != 0; let mut overrides = HashMap::new(); if client_version.minor() >= 12 { - let num_overrides = wire::read_u64(r).await?; + let num_overrides = r.read_u64_le().await?; for _ in 0..num_overrides { - let name = wire::read_string(r, 0..MAX_SETTING_SIZE).await?; - let value = wire::read_string(r, 0..MAX_SETTING_SIZE).await?; + let name = wire::read_string(r, 0..=MAX_SETTING_SIZE).await?; + let value = wire::read_string(r, 0..=MAX_SETTING_SIZE).await?; overrides.insert(name, value); } } @@ -197,17 +218,17 @@ pub async fn server_handshake_client<'a, RW: 'a>( where &'a mut RW: AsyncReadExt + AsyncWriteExt + Unpin, { - let worker_magic_1 = wire::read_u64(&mut conn).await?; + let worker_magic_1 = conn.read_u64_le().await?; if worker_magic_1 != WORKER_MAGIC_1 { Err(std::io::Error::new( ErrorKind::InvalidData, format!("Incorrect worker magic number received: {}", worker_magic_1), )) } else { - wire::write_u64(&mut conn, WORKER_MAGIC_2).await?; - wire::write_u64(&mut conn, PROTOCOL_VERSION.into()).await?; + conn.write_u64_le(WORKER_MAGIC_2).await?; + conn.write_u64_le(PROTOCOL_VERSION.into()).await?; conn.flush().await?; - let client_version = wire::read_u64(&mut conn).await?; + let client_version = conn.read_u64_le().await?; // Parse into ProtocolVersion. let client_version: ProtocolVersion = client_version .try_into() @@ -220,14 +241,14 @@ where } if client_version.minor() >= 14 { // Obsolete CPU affinity. - let read_affinity = wire::read_u64(&mut conn).await?; + let read_affinity = conn.read_u64_le().await?; if read_affinity != 0 { - let _cpu_affinity = wire::read_u64(&mut conn).await?; + let _cpu_affinity = conn.read_u64_le().await?; }; } if client_version.minor() >= 11 { // Obsolete reserveSpace - let _reserve_space = wire::read_u64(&mut conn).await?; + let _reserve_space = conn.read_u64_le().await?; } if client_version.minor() >= 33 { // Nix version. We're plain lying, we're not Nix, but eh… @@ -245,7 +266,7 @@ where /// Read a worker [Operation] from the wire. pub async fn read_op<R: AsyncReadExt + Unpin>(r: &mut R) -> std::io::Result<Operation> { - let op_number = wire::read_u64(r).await?; + let op_number = r.read_u64_le().await?; Operation::from_u64(op_number).ok_or(Error::new( ErrorKind::InvalidData, format!("Invalid OP number {}", op_number), @@ -278,8 +299,8 @@ where W: AsyncReadExt + AsyncWriteExt + Unpin, { match t { - Trust::Trusted => wire::write_u64(conn, 1).await, - Trust::NotTrusted => wire::write_u64(conn, 2).await, + Trust::Trusted => conn.write_u64_le(1).await, + Trust::NotTrusted => conn.write_u64_le(2).await, } } diff --git a/tvix/nix-compat/src/store_path/mod.rs b/tvix/nix-compat/src/store_path/mod.rs index ac9f1805e3..707c41a92d 100644 --- a/tvix/nix-compat/src/store_path/mod.rs +++ b/tvix/nix-compat/src/store_path/mod.rs @@ -56,7 +56,7 @@ pub enum Error { #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct StorePath { digest: [u8; DIGEST_SIZE], - name: String, + name: Box<str>, } impl StorePath { @@ -65,7 +65,7 @@ impl StorePath { } pub fn name(&self) -> &str { - self.name.as_ref() + &self.name } pub fn as_ref(&self) -> StorePathRef<'_> { @@ -176,10 +176,7 @@ pub struct StorePathRef<'a> { impl<'a> From<&'a StorePath> for StorePathRef<'a> { fn from(&StorePath { digest, ref name }: &'a StorePath) -> Self { - StorePathRef { - digest, - name: name.as_ref(), - } + StorePathRef { digest, name } } } @@ -209,7 +206,7 @@ impl<'a> StorePathRef<'a> { pub fn to_owned(&self) -> StorePath { StorePath { digest: self.digest, - name: self.name.to_owned(), + name: self.name.into(), } } @@ -303,8 +300,7 @@ impl Serialize for StorePathRef<'_> { } } -/// NAME_CHARS contains `true` for bytes that are valid in store path names, -/// not accounting for '.' being permitted only past the first character. +/// NAME_CHARS contains `true` for bytes that are valid in store path names. static NAME_CHARS: [bool; 256] = { let mut tbl = [false; 256]; let mut c = 0; @@ -332,10 +328,6 @@ pub(crate) fn validate_name(s: &(impl AsRef<[u8]> + ?Sized)) -> Result<&str, Err return Err(Error::InvalidLength); } - if s[0] == b'.' { - return Err(Error::InvalidName(s.to_vec(), 0)); - } - let mut valid = true; for &c in s { valid = valid && NAME_CHARS[c as usize]; @@ -399,7 +391,7 @@ mod tests { let expected_digest: [u8; DIGEST_SIZE] = hex!("8a12321522fd91efbd60ebb2481af88580f61600"); - assert_eq!("net-tools-1.60_p20170221182432", nixpath.name); + assert_eq!("net-tools-1.60_p20170221182432", nixpath.name()); assert_eq!(nixpath.digest, expected_digest); assert_eq!(example_nix_path_str, nixpath.to_string()) @@ -446,15 +438,18 @@ mod tests { } } - /// This is the store path rejected when `nix-store --add`'ing an + /// This is the store path *accepted* when `nix-store --add`'ing an /// empty `.gitignore` file. /// - /// Nix 2.4 accidentally dropped this behaviour, but this is considered a bug. - /// See https://github.com/NixOS/nix/pull/9095. + /// Nix 2.4 accidentally permitted this behaviour, but the revert came + /// too late to beat Hyrum's law. It is now considered permissible. + /// + /// https://github.com/NixOS/nix/pull/9095 (revert) + /// https://github.com/NixOS/nix/pull/9867 (revert-of-revert) #[test] fn starts_with_dot() { StorePath::from_bytes(b"fli4bwscgna7lpm7v5xgnjxrxh0yc7ra-.gitignore") - .expect_err("must fail"); + .expect("must succeed"); } #[test] diff --git a/tvix/nix-compat/src/wire/bytes/mod.rs b/tvix/nix-compat/src/wire/bytes/mod.rs index 0c637e6c39..2ed071e379 100644 --- a/tvix/nix-compat/src/wire/bytes/mod.rs +++ b/tvix/nix-compat/src/wire/bytes/mod.rs @@ -1,23 +1,21 @@ use std::{ io::{Error, ErrorKind}, - ops::RangeBounds, + mem::MaybeUninit, + ops::RangeInclusive, }; -use tokio::io::{AsyncReadExt, AsyncWriteExt}; +use tokio::io::{self, AsyncReadExt, AsyncWriteExt, ReadBuf}; -mod reader; +pub(crate) mod reader; pub use reader::BytesReader; mod writer; pub use writer::BytesWriter; -use super::primitive; - /// 8 null bytes, used to write out padding. const EMPTY_BYTES: &[u8; 8] = &[0u8; 8]; /// The length of the size field, in bytes is always 8. const LEN_SIZE: usize = 8; -#[allow(dead_code)] /// Read a "bytes wire packet" from the AsyncRead. /// Rejects reading more than `allowed_size` bytes of payload. /// @@ -35,24 +33,29 @@ const LEN_SIZE: usize = 8; /// /// This buffers the entire payload into memory, /// a streaming version is available at [crate::wire::bytes::BytesReader]. -pub async fn read_bytes<R, S>(r: &mut R, allowed_size: S) -> std::io::Result<Vec<u8>> +pub async fn read_bytes<R: ?Sized>( + r: &mut R, + allowed_size: RangeInclusive<usize>, +) -> io::Result<Vec<u8>> where R: AsyncReadExt + Unpin, - S: RangeBounds<u64>, { // read the length field - let len = primitive::read_u64(r).await?; - - if !allowed_size.contains(&len) { - return Err(std::io::Error::new( - std::io::ErrorKind::InvalidData, - "signalled package size not in allowed range", - )); - } + let len = r.read_u64_le().await?; + let len: usize = len + .try_into() + .ok() + .filter(|len| allowed_size.contains(len)) + .ok_or_else(|| { + io::Error::new( + io::ErrorKind::InvalidData, + "signalled package size not in allowed range", + ) + })?; // calculate the total length, including padding. // byte packets are padded to 8 byte blocks each. - let padded_len = padding_len(len) as u64 + (len as u64); + let padded_len = padding_len(len as u64) as u64 + (len as u64); let mut limited_reader = r.take(padded_len); let mut buf = Vec::new(); @@ -61,34 +64,87 @@ where // make sure we got exactly the number of bytes, and not less. if s as u64 != padded_len { - return Err(std::io::Error::new( - std::io::ErrorKind::InvalidData, - "got less bytes than expected", - )); + return Err(io::ErrorKind::UnexpectedEof.into()); } - let (_content, padding) = buf.split_at(len as usize); + let (_content, padding) = buf.split_at(len); // ensure the padding is all zeroes. - if !padding.iter().all(|e| *e == b'\0') { - return Err(std::io::Error::new( - std::io::ErrorKind::InvalidData, + if padding.iter().any(|&b| b != 0) { + return Err(io::Error::new( + io::ErrorKind::InvalidData, "padding is not all zeroes", )); } // return the data without the padding - buf.truncate(len as usize); + buf.truncate(len); Ok(buf) } +pub(crate) async fn read_bytes_buf<'a, const N: usize, R: ?Sized>( + reader: &mut R, + buf: &'a mut [MaybeUninit<u8>; N], + allowed_size: RangeInclusive<usize>, +) -> io::Result<&'a [u8]> +where + R: AsyncReadExt + Unpin, +{ + assert_eq!(N % 8, 0); + assert!(*allowed_size.end() <= N); + + let len = reader.read_u64_le().await?; + let len: usize = len + .try_into() + .ok() + .filter(|len| allowed_size.contains(len)) + .ok_or_else(|| { + io::Error::new( + io::ErrorKind::InvalidData, + "signalled package size not in allowed range", + ) + })?; + + let buf_len = (len + 7) & !7; + let buf = { + let mut read_buf = ReadBuf::uninit(&mut buf[..buf_len]); + + while read_buf.filled().len() < buf_len { + reader.read_buf(&mut read_buf).await?; + } + + // ReadBuf::filled does not pass the underlying buffer's lifetime through, + // so we must make a trip to hell. + // + // SAFETY: `read_buf` is filled up to `buf_len`, and we verify that it is + // still pointing at the same underlying buffer. + unsafe { + assert_eq!(read_buf.filled().as_ptr(), buf.as_ptr() as *const u8); + assume_init_bytes(&buf[..buf_len]) + } + }; + + if buf[len..buf_len].iter().any(|&b| b != 0) { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "padding is not all zeroes", + )); + } + + Ok(&buf[..len]) +} + +/// SAFETY: The bytes have to actually be initialized. +unsafe fn assume_init_bytes(slice: &[MaybeUninit<u8>]) -> &[u8] { + &*(slice as *const [MaybeUninit<u8>] as *const [u8]) +} + /// Read a "bytes wire packet" of from the AsyncRead and tries to parse as string. /// Internally uses [read_bytes]. /// Rejects reading more than `allowed_size` bytes of payload. -pub async fn read_string<R, S>(r: &mut R, allowed_size: S) -> std::io::Result<String> +pub async fn read_string<R>(r: &mut R, allowed_size: RangeInclusive<usize>) -> io::Result<String> where R: AsyncReadExt + Unpin, - S: RangeBounds<u64>, { let bytes = read_bytes(r, allowed_size).await?; String::from_utf8(bytes).map_err(|e| Error::new(ErrorKind::InvalidData, e)) @@ -106,9 +162,9 @@ where pub async fn write_bytes<W: AsyncWriteExt + Unpin, B: AsRef<[u8]>>( w: &mut W, b: B, -) -> std::io::Result<()> { +) -> io::Result<()> { // write the size packet. - primitive::write_u64(w, b.as_ref().len() as u64).await?; + w.write_u64_le(b.as_ref().len() as u64).await?; // write the payload w.write_all(b.as_ref()).await?; @@ -122,14 +178,10 @@ pub async fn write_bytes<W: AsyncWriteExt + Unpin, B: AsRef<[u8]>>( } /// Computes the number of bytes we should add to len (a length in -/// bytes) to be alined on 64 bits (8 bytes). +/// bytes) to be aligned on 64 bits (8 bytes). fn padding_len(len: u64) -> u8 { - let modulo = len % 8; - if modulo == 0 { - 0 - } else { - 8 - modulo as u8 - } + let aligned = len.wrapping_add(7) & !7; + aligned.wrapping_sub(len) as u8 } #[cfg(test)] @@ -141,7 +193,7 @@ mod tests { /// The maximum length of bytes packets we're willing to accept in the test /// cases. - const MAX_LEN: u64 = 1024; + const MAX_LEN: usize = 1024; #[tokio::test] async fn test_read_8_bytes() { @@ -152,10 +204,7 @@ mod tests { assert_eq!( &12345678u64.to_le_bytes(), - read_bytes(&mut mock, 0u64..MAX_LEN) - .await - .unwrap() - .as_slice() + read_bytes(&mut mock, 0..=MAX_LEN).await.unwrap().as_slice() ); } @@ -168,10 +217,7 @@ mod tests { assert_eq!( hex!("010203040506070809"), - read_bytes(&mut mock, 0u64..MAX_LEN) - .await - .unwrap() - .as_slice() + read_bytes(&mut mock, 0..=MAX_LEN).await.unwrap().as_slice() ); } @@ -183,10 +229,7 @@ mod tests { assert_eq!( hex!(""), - read_bytes(&mut mock, 0u64..MAX_LEN) - .await - .unwrap() - .as_slice() + read_bytes(&mut mock, 0..=MAX_LEN).await.unwrap().as_slice() ); } @@ -196,7 +239,7 @@ mod tests { async fn test_read_reject_too_large() { let mut mock = Builder::new().read(&100u64.to_le_bytes()).build(); - read_bytes(&mut mock, 10..10) + read_bytes(&mut mock, 10..=10) .await .expect_err("expect this to fail"); } @@ -232,4 +275,9 @@ mod tests { .build(); assert_ok!(write_bytes(&mut mock, &input).await) } + + #[test] + fn padding_len_u64_max() { + assert_eq!(padding_len(u64::MAX), 1); + } } diff --git a/tvix/nix-compat/src/wire/bytes/reader/mod.rs b/tvix/nix-compat/src/wire/bytes/reader/mod.rs index 78615faf0f..6bd376c06f 100644 --- a/tvix/nix-compat/src/wire/bytes/reader/mod.rs +++ b/tvix/nix-compat/src/wire/bytes/reader/mod.rs @@ -1,12 +1,18 @@ use std::{ + future::Future, io, - ops::{Bound, RangeBounds}, + num::NonZeroU64, + ops::RangeBounds, pin::Pin, task::{self, ready, Poll}, }; -use tokio::io::{AsyncRead, ReadBuf}; +use tokio::io::{AsyncBufRead, AsyncRead, AsyncReadExt, ReadBuf}; -use trailer::TrailerReader; +use trailer::{read_trailer, ReadTrailer, Trailer}; + +#[doc(hidden)] +pub use self::trailer::Pad; +pub(crate) use self::trailer::Tag; mod trailer; /// Reads a "bytes wire packet" from the underlying reader. @@ -14,40 +20,46 @@ mod trailer; /// however this structure provides a [AsyncRead] interface, /// allowing to not having to pass around the entire payload in memory. /// -/// After being constructed with the underlying reader and an allowed size, -/// subsequent requests to poll_read will return payload data until the end -/// of the packet is reached. -/// -/// Internally, it will first read over the size packet, filling payload_size, -/// ensuring it fits allowed_size, then return payload data. +/// It is constructed by reading a size with [BytesReader::new], +/// and yields payload data until the end of the packet is reached. /// /// It will not return the final bytes before all padding has been successfully /// consumed as well, but the full length of the reader must be consumed. /// -/// In case of an error due to size constraints, or in case of not reading -/// all the way to the end (and getting a EOF), the underlying reader is no -/// longer usable and might return garbage. -pub struct BytesReader<R> { - state: State<R>, +/// If the data is not read all the way to the end, or an error is encountered, +/// the underlying reader is no longer usable and might return garbage. +#[derive(Debug)] +#[allow(private_bounds)] +pub struct BytesReader<R, T: Tag = Pad> { + state: State<R, T>, +} + +/// Split the `user_len` into `body_len` and `tail_len`, which are respectively +/// the non-terminal 8-byte blocks, and the ≤8 bytes of user data contained in +/// the trailer block. +#[inline(always)] +fn split_user_len(user_len: NonZeroU64) -> (u64, u8) { + let n = user_len.get() - 1; + let body_len = n & !7; + let tail_len = (n & 7) as u8 + 1; + (body_len, tail_len) } #[derive(Debug)] -enum State<R> { - Size { - reader: Option<R>, - /// Minimum length (inclusive) - user_len_min: u64, - /// Maximum length (inclusive) - user_len_max: u64, - filled: u8, - buf: [u8; 8], - }, +enum State<R, T: Tag> { + /// Full 8-byte blocks are being read and released to the caller. + /// NOTE: The final 8-byte block is *always* part of the trailer. Body { reader: Option<R>, consumed: u64, - user_len: u64, + /// The total length of all user data contained in both the body and trailer. + user_len: NonZeroU64, }, - Trailer(TrailerReader<R>), + /// The trailer is in the process of being read. + ReadTrailer(ReadTrailer<R, T>), + /// The trailer has been fully read and validated, + /// and data can now be released to the caller. + ReleaseTrailer { consumed: u8, data: Trailer }, } impl<R> BytesReader<R> @@ -55,43 +67,63 @@ where R: AsyncRead + Unpin, { /// Constructs a new BytesReader, using the underlying passed reader. - pub fn new<S: RangeBounds<u64>>(reader: R, allowed_size: S) -> Self { - let user_len_min = match allowed_size.start_bound() { - Bound::Included(&n) => n, - Bound::Excluded(&n) => n.saturating_add(1), - Bound::Unbounded => 0, - }; - - let user_len_max = match allowed_size.end_bound() { - Bound::Included(&n) => n, - Bound::Excluded(&n) => n.checked_sub(1).unwrap(), - Bound::Unbounded => u64::MAX, - }; - - Self { - state: State::Size { - reader: Some(reader), - user_len_min, - user_len_max, - filled: 0, - buf: [0; 8], - }, - } + pub async fn new<S: RangeBounds<u64>>(reader: R, allowed_size: S) -> io::Result<Self> { + BytesReader::new_internal(reader, allowed_size).await } +} - /// Construct a new BytesReader with a known, and already-read size. - pub fn with_size(reader: R, size: u64) -> Self { - Self { - state: State::Body { - reader: Some(reader), - consumed: 0, - user_len: size, +#[allow(private_bounds)] +impl<R, T: Tag> BytesReader<R, T> +where + R: AsyncRead + Unpin, +{ + /// Constructs a new BytesReader, using the underlying passed reader. + pub(crate) async fn new_internal<S: RangeBounds<u64>>( + mut reader: R, + allowed_size: S, + ) -> io::Result<Self> { + let size = reader.read_u64_le().await?; + + if !allowed_size.contains(&size) { + return Err(io::Error::new(io::ErrorKind::InvalidData, "invalid size")); + } + + Ok(Self { + state: match NonZeroU64::new(size) { + Some(size) => State::Body { + reader: Some(reader), + consumed: 0, + user_len: size, + }, + None => State::ReleaseTrailer { + consumed: 0, + data: read_trailer::<R, T>(reader, 0).await?, + }, }, + }) + } + + /// Returns whether there is any remaining data to be read. + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Remaining data length, ie not including data already read. + /// + /// If the size has not been read yet, this is [None]. + pub fn len(&self) -> u64 { + match self.state { + State::Body { + consumed, user_len, .. + } => user_len.get() - consumed, + State::ReadTrailer(ref fut) => fut.len() as u64, + State::ReleaseTrailer { consumed, ref data } => data.len() as u64 - consumed as u64, } } } -impl<R: AsyncRead + Unpin> AsyncRead for BytesReader<R> { +#[allow(private_bounds)] +impl<R: AsyncRead + Unpin, T: Tag> AsyncRead for BytesReader<R, T> { fn poll_read( mut self: Pin<&mut Self>, cx: &mut task::Context, @@ -101,66 +133,25 @@ impl<R: AsyncRead + Unpin> AsyncRead for BytesReader<R> { loop { match this { - State::Size { - reader, - user_len_min, - user_len_max, - filled: 8, - buf, - } => { - let reader = reader.take().unwrap(); - - let data_len = u64::from_le_bytes(*buf); - if data_len < *user_len_min || data_len > *user_len_max { - return Err(io::Error::new(io::ErrorKind::InvalidData, "invalid size")) - .into(); - } - - *this = State::Body { - reader: Some(reader), - consumed: 0, - user_len: data_len, - }; - } - State::Size { - reader, - filled, - buf, - .. - } => { - let reader = reader.as_mut().unwrap(); - - let mut read_buf = ReadBuf::new(&mut buf[..]); - read_buf.advance(*filled as usize); - ready!(Pin::new(reader).poll_read(cx, &mut read_buf))?; - - let new_filled = read_buf.filled().len() as u8; - if *filled == new_filled { - return Err(io::ErrorKind::UnexpectedEof.into()).into(); - } - - *filled = new_filled; - } State::Body { reader, consumed, user_len, } => { - let body_len = *user_len & !7; + let (body_len, tail_len) = split_user_len(*user_len); let remaining = body_len - *consumed; let reader = if remaining == 0 { let reader = reader.take().unwrap(); - let user_len = (*user_len & 7) as u8; - *this = State::Trailer(TrailerReader::new(reader, user_len)); + *this = State::ReadTrailer(read_trailer(reader, tail_len)); continue; } else { - reader.as_mut().unwrap() + Pin::new(reader.as_mut().unwrap()) }; let mut bytes_read = 0; ready!(with_limited(buf, remaining, |buf| { - let ret = Pin::new(reader).poll_read(cx, buf); + let ret = reader.poll_read(cx, buf); bytes_read = buf.initialized().len(); ret }))?; @@ -174,14 +165,116 @@ impl<R: AsyncRead + Unpin> AsyncRead for BytesReader<R> { } .into(); } - State::Trailer(reader) => { - return Pin::new(reader).poll_read(cx, buf); + State::ReadTrailer(fut) => { + *this = State::ReleaseTrailer { + consumed: 0, + data: ready!(Pin::new(fut).poll(cx))?, + }; + } + State::ReleaseTrailer { consumed, data } => { + let data = &data[*consumed as usize..]; + let data = &data[..usize::min(data.len(), buf.remaining())]; + + buf.put_slice(data); + *consumed += data.len() as u8; + + return Ok(()).into(); } } } } } +#[allow(private_bounds)] +impl<R: AsyncBufRead + Unpin, T: Tag> AsyncBufRead for BytesReader<R, T> { + fn poll_fill_buf(self: Pin<&mut Self>, cx: &mut task::Context) -> Poll<io::Result<&[u8]>> { + let this = &mut self.get_mut().state; + + loop { + match this { + // This state comes *after* the following case, + // but we can't keep it in logical order because + // that would lengthen the borrow lifetime. + State::Body { + reader, + consumed, + user_len, + } if { + let (body_len, _) = split_user_len(*user_len); + let remaining = body_len - *consumed; + + remaining == 0 + } => + { + let reader = reader.take().unwrap(); + let (_, tail_len) = split_user_len(*user_len); + + *this = State::ReadTrailer(read_trailer(reader, tail_len)); + } + State::Body { + reader, + consumed, + user_len, + } => { + let (body_len, _) = split_user_len(*user_len); + let remaining = body_len - *consumed; + + let reader = Pin::new(reader.as_mut().unwrap()); + + match ready!(reader.poll_fill_buf(cx))? { + &[] => { + return Err(io::ErrorKind::UnexpectedEof.into()).into(); + } + mut buf => { + if buf.len() as u64 > remaining { + buf = &buf[..remaining as usize]; + } + + return Ok(buf).into(); + } + } + } + State::ReadTrailer(fut) => { + *this = State::ReleaseTrailer { + consumed: 0, + data: ready!(Pin::new(fut).poll(cx))?, + }; + } + State::ReleaseTrailer { consumed, data } => { + return Ok(&data[*consumed as usize..]).into(); + } + } + } + } + + fn consume(mut self: Pin<&mut Self>, amt: usize) { + match &mut self.state { + State::Body { + reader, + consumed, + user_len, + } => { + let reader = Pin::new(reader.as_mut().unwrap()); + let (body_len, _) = split_user_len(*user_len); + + *consumed = consumed + .checked_add(amt as u64) + .filter(|&consumed| consumed <= body_len) + .expect("consumed out of bounds"); + + reader.consume(amt); + } + State::ReadTrailer(_) => unreachable!(), + State::ReleaseTrailer { consumed, data } => { + *consumed = amt + .checked_add(*consumed as usize) + .filter(|&consumed| consumed <= data.len()) + .expect("consumed out of bounds") as u8; + } + } + } +} + /// Make a limited version of `buf`, consisting only of up to `n` bytes of the unfilled section, and call `f` with it. /// After `f` returns, we propagate the filled cursor advancement back to `buf`. fn with_limited<R>(buf: &mut ReadBuf, n: u64, f: impl FnOnce(&mut ReadBuf) -> R) -> R { @@ -214,8 +307,8 @@ mod tests { use hex_literal::hex; use lazy_static::lazy_static; use rstest::rstest; - use tokio::io::AsyncReadExt; - use tokio_test::{assert_err, io::Builder}; + use tokio::io::{AsyncReadExt, BufReader}; + use tokio_test::io::Builder; use super::*; @@ -249,14 +342,16 @@ mod tests { .read(&produce_packet_bytes(payload).await) .build(); - let mut r = BytesReader::new(&mut mock, ..=LARGE_PAYLOAD.len() as u64); + let mut r = BytesReader::new(&mut mock, ..=LARGE_PAYLOAD.len() as u64) + .await + .unwrap(); let mut buf = Vec::new(); r.read_to_end(&mut buf).await.expect("must succeed"); assert_eq!(payload, &buf[..]); } - /// Read bytes packets of various length, and ensure read_to_end returns the + /// Read bytes packets of various length, and ensure copy_buf reads the /// expected payload. #[rstest] #[case::empty(&[])] // empty bytes packet @@ -265,20 +360,21 @@ mod tests { #[case::size_9b(&hex!("000102030405060708"))] // 9 bytes payload (7 bytes padding) #[case::size_1m(LARGE_PAYLOAD.as_slice())] // larger bytes packet #[tokio::test] - async fn read_payload_correct_known(#[case] payload: &[u8]) { - let packet = produce_packet_bytes(payload).await; - - let size = u64::from_le_bytes({ - let mut buf = [0; 8]; - buf.copy_from_slice(&packet[..8]); - buf - }); + async fn read_payload_correct_readbuf(#[case] payload: &[u8]) { + let mut mock = BufReader::new( + Builder::new() + .read(&produce_packet_bytes(payload).await) + .build(), + ); - let mut mock = Builder::new().read(&packet[8..]).build(); + let mut r = BytesReader::new(&mut mock, ..=LARGE_PAYLOAD.len() as u64) + .await + .unwrap(); - let mut r = BytesReader::with_size(&mut mock, size); let mut buf = Vec::new(); - r.read_to_end(&mut buf).await.expect("must succeed"); + tokio::io::copy_buf(&mut r, &mut buf) + .await + .expect("copy_buf must succeed"); assert_eq!(payload, &buf[..]); } @@ -291,9 +387,13 @@ mod tests { .read(&produce_packet_bytes(payload).await[0..8]) // We stop reading after the size packet .build(); - let mut r = BytesReader::new(&mut mock, ..2048); - let mut buf = Vec::new(); - assert_err!(r.read_to_end(&mut buf).await); + assert_eq!( + BytesReader::new(&mut mock, ..2048) + .await + .unwrap_err() + .kind(), + io::ErrorKind::InvalidData + ); } /// Fail if the bytes packet is smaller than allowed @@ -304,9 +404,52 @@ mod tests { .read(&produce_packet_bytes(payload).await[0..8]) // We stop reading after the size packet .build(); - let mut r = BytesReader::new(&mut mock, 1024..2048); - let mut buf = Vec::new(); - assert_err!(r.read_to_end(&mut buf).await); + assert_eq!( + BytesReader::new(&mut mock, 1024..2048) + .await + .unwrap_err() + .kind(), + io::ErrorKind::InvalidData + ); + } + + /// Read the trailer immediately if there is no payload. + #[tokio::test] + async fn read_trailer_immediately() { + use crate::nar::wire::PadPar; + + let mut mock = Builder::new() + .read(&[0; 8]) + .read(&PadPar::PATTERN[8..]) + .build(); + + BytesReader::<_, PadPar>::new_internal(&mut mock, ..) + .await + .unwrap(); + + // The mock reader will panic if dropped without reading all data. + } + + /// Read the trailer even if we only read the exact payload size. + #[tokio::test] + async fn read_exact_trailer() { + use crate::nar::wire::PadPar; + + let mut mock = Builder::new() + .read(&16u64.to_le_bytes()) + .read(&[0x55; 16]) + .read(&PadPar::PATTERN[8..]) + .build(); + + let mut reader = BytesReader::<_, PadPar>::new_internal(&mut mock, ..) + .await + .unwrap(); + + let mut buf = [0; 16]; + reader.read_exact(&mut buf).await.unwrap(); + assert_eq!(buf, [0x55; 16]); + + // The mock reader will panic if dropped without reading all data. } /// Fail if the padding is not all zeroes @@ -318,7 +461,7 @@ mod tests { packet_bytes[12] = 0xff; let mut mock = Builder::new().read(&packet_bytes).build(); // We stop reading after the faulty bit - let mut r = BytesReader::new(&mut mock, ..MAX_LEN); + let mut r = BytesReader::new(&mut mock, ..MAX_LEN).await.unwrap(); let mut buf = Vec::new(); r.read_to_end(&mut buf).await.expect_err("must fail"); @@ -335,15 +478,13 @@ mod tests { .read(&produce_packet_bytes(payload).await[..4]) .build(); - let mut r = BytesReader::new(&mut mock, ..MAX_LEN); - let mut buf = [0u8; 1]; - assert_eq!( - r.read_exact(&mut buf).await.expect_err("must fail").kind(), - std::io::ErrorKind::UnexpectedEof + BytesReader::new(&mut mock, ..MAX_LEN) + .await + .expect_err("must fail") + .kind(), + io::ErrorKind::UnexpectedEof ); - - assert_eq!(&[0], &buf, "buffer should stay empty"); } /// Start a 9 bytes payload packet, but have the underlying reader return @@ -357,7 +498,7 @@ mod tests { .read(&produce_packet_bytes(payload).await[..8 + 4]) .build(); - let mut r = BytesReader::new(&mut mock, ..MAX_LEN); + let mut r = BytesReader::new(&mut mock, ..MAX_LEN).await.unwrap(); let mut buf = [0; 9]; r.read_exact(&mut buf[..4]).await.expect("must succeed"); @@ -384,7 +525,7 @@ mod tests { .read(&produce_packet_bytes(payload).await[..offset]) .build(); - let mut r = BytesReader::new(&mut mock, ..MAX_LEN); + let mut r = BytesReader::new(&mut mock, ..MAX_LEN).await.unwrap(); // read_exact of the payload *body* will succeed, but a subsequent read will // return UnexpectedEof error. @@ -411,10 +552,60 @@ mod tests { .read_error(std::io::Error::new(std::io::ErrorKind::Other, "foo")) .build(); - let mut r = BytesReader::new(&mut mock, ..MAX_LEN); - let mut buf = Vec::new(); + // Either length reading or data reading can fail, depending on which test case we're in. + let err: io::Error = async { + let mut r = BytesReader::new(&mut mock, ..MAX_LEN).await?; + let mut buf = Vec::new(); + + r.read_to_end(&mut buf).await?; + + Ok(()) + } + .await + .expect_err("must fail"); + + assert_eq!( + err.kind(), + std::io::ErrorKind::Other, + "error kind must match" + ); + + assert_eq!( + err.into_inner().unwrap().to_string(), + "foo", + "error payload must contain foo" + ); + } + + /// Start a 9 bytes payload packet, but return an error after a certain position. + /// Ensure that error is propagated (AsyncReadBuf case) + #[rstest] + #[case::during_size(4)] + #[case::before_payload(8)] + #[case::during_payload(8 + 4)] + #[case::before_padding(8 + 4)] + #[case::during_padding(8 + 9 + 2)] + #[tokio::test] + async fn propagate_error_from_reader_buffered(#[case] offset: usize) { + let payload = &hex!("FF0102030405060708"); + let mock = Builder::new() + .read(&produce_packet_bytes(payload).await[..offset]) + .read_error(std::io::Error::new(std::io::ErrorKind::Other, "foo")) + .build(); + let mut mock = BufReader::new(mock); + + // Either length reading or data reading can fail, depending on which test case we're in. + let err: io::Error = async { + let mut r = BytesReader::new(&mut mock, ..MAX_LEN).await?; + let mut buf = Vec::new(); + + tokio::io::copy_buf(&mut r, &mut buf).await?; + + Ok(()) + } + .await + .expect_err("must fail"); - let err = r.read_to_end(&mut buf).await.expect_err("must fail"); assert_eq!( err.kind(), std::io::ErrorKind::Other, @@ -438,13 +629,33 @@ mod tests { .read_error(std::io::Error::new(std::io::ErrorKind::Other, "foo")) .build(); - let mut r = BytesReader::new(&mut mock, ..MAX_LEN); + let mut r = BytesReader::new(&mut mock, ..MAX_LEN).await.unwrap(); let mut buf = Vec::new(); r.read_to_end(&mut buf).await.expect("must succeed"); assert_eq!(buf.as_slice(), payload); } + /// If there's an error right after the padding, we don't propagate it, as + /// we're done reading. We just return EOF. + #[tokio::test] + async fn no_error_after_eof_buffered() { + let payload = &hex!("FF0102030405060708"); + let mock = Builder::new() + .read(&produce_packet_bytes(payload).await) + .read_error(std::io::Error::new(std::io::ErrorKind::Other, "foo")) + .build(); + let mut mock = BufReader::new(mock); + + let mut r = BytesReader::new(&mut mock, ..MAX_LEN).await.unwrap(); + let mut buf = Vec::new(); + + tokio::io::copy_buf(&mut r, &mut buf) + .await + .expect("must succeed"); + assert_eq!(buf.as_slice(), payload); + } + /// Introduce various stalls in various places of the packet, to ensure we /// handle these cases properly, too. #[rstest] @@ -462,7 +673,9 @@ mod tests { .read(&produce_packet_bytes(payload).await[offset..]) .build(); - let mut r = BytesReader::new(&mut mock, ..=LARGE_PAYLOAD.len() as u64); + let mut r = BytesReader::new(&mut mock, ..=LARGE_PAYLOAD.len() as u64) + .await + .unwrap(); let mut buf = Vec::new(); r.read_to_end(&mut buf).await.expect("must succeed"); diff --git a/tvix/nix-compat/src/wire/bytes/reader/trailer.rs b/tvix/nix-compat/src/wire/bytes/reader/trailer.rs index 958cead42d..3a5bb75e71 100644 --- a/tvix/nix-compat/src/wire/bytes/reader/trailer.rs +++ b/tvix/nix-compat/src/wire/bytes/reader/trailer.rs @@ -1,4 +1,5 @@ use std::{ + fmt::Debug, future::Future, marker::PhantomData, ops::Deref, @@ -8,11 +9,11 @@ use std::{ use tokio::io::{self, AsyncRead, ReadBuf}; -/// Trailer represents up to 7 bytes of data read as part of the trailer block(s) +/// Trailer represents up to 8 bytes of data read as part of the trailer block(s) #[derive(Debug)] pub(crate) struct Trailer { data_len: u8, - buf: [u8; 7], + buf: [u8; 8], } impl Deref for Trailer { @@ -27,20 +28,20 @@ impl Deref for Trailer { pub(crate) trait Tag { /// The expected suffix /// - /// The first 7 bytes may be ignored, and it must be an 8-byte aligned size. + /// The first 8 bytes may be ignored, and it must be an 8-byte aligned size. const PATTERN: &'static [u8]; /// Suitably sized buffer for reading [Self::PATTERN] /// /// HACK: This is a workaround for const generics limitations. - type Buf: AsRef<[u8]> + AsMut<[u8]> + Unpin; + type Buf: AsRef<[u8]> + AsMut<[u8]> + Debug + Unpin; /// Make an instance of [Self::Buf] fn make_buf() -> Self::Buf; } #[derive(Debug)] -pub(crate) enum Pad {} +pub enum Pad {} impl Tag for Pad { const PATTERN: &'static [u8] = &[0; 8]; @@ -58,7 +59,7 @@ pub(crate) struct ReadTrailer<R, T: Tag> { data_len: u8, filled: u8, buf: T::Buf, - _phantom: PhantomData<*const T>, + _phantom: PhantomData<fn(T) -> T>, } /// read_trailer returns a [Future] that reads a trailer with a given [Tag] from `reader` @@ -66,7 +67,7 @@ pub(crate) fn read_trailer<R: AsyncRead + Unpin, T: Tag>( reader: R, data_len: u8, ) -> ReadTrailer<R, T> { - assert!(data_len < 8, "payload in trailer must be less than 8 bytes"); + assert!(data_len <= 8, "payload in trailer must be <= 8 bytes"); let buf = T::make_buf(); assert_eq!(buf.as_ref().len(), T::PATTERN.len()); @@ -81,10 +82,16 @@ pub(crate) fn read_trailer<R: AsyncRead + Unpin, T: Tag>( } } +impl<R, T: Tag> ReadTrailer<R, T> { + pub fn len(&self) -> u8 { + self.data_len + } +} + impl<R: AsyncRead + Unpin, T: Tag> Future for ReadTrailer<R, T> { type Output = io::Result<Trailer>; - fn poll(mut self: Pin<&mut Self>, cx: &mut task::Context) -> task::Poll<Self::Output> { + fn poll(mut self: Pin<&mut Self>, cx: &mut task::Context) -> Poll<Self::Output> { let this = &mut *self; loop { @@ -101,8 +108,8 @@ impl<R: AsyncRead + Unpin, T: Tag> Future for ReadTrailer<R, T> { } if this.filled as usize == T::PATTERN.len() { - let mut buf = [0; 7]; - buf.copy_from_slice(&this.buf.as_ref()[..7]); + let mut buf = [0; 8]; + buf.copy_from_slice(&this.buf.as_ref()[..8]); return Ok(Trailer { data_len: this.data_len, @@ -117,10 +124,9 @@ impl<R: AsyncRead + Unpin, T: Tag> Future for ReadTrailer<R, T> { ready!(Pin::new(&mut this.reader).poll_read(cx, &mut buf))?; this.filled = { - let prev_filled = this.filled; let filled = buf.filled().len() as u8; - if filled == prev_filled { + if filled == this.filled { return Err(io::ErrorKind::UnexpectedEof.into()).into(); } @@ -130,61 +136,9 @@ impl<R: AsyncRead + Unpin, T: Tag> Future for ReadTrailer<R, T> { } } -#[derive(Debug)] -pub(crate) enum TrailerReader<R> { - Reading(ReadTrailer<R, Pad>), - Releasing { off: u8, data: Trailer }, - Done, -} - -impl<R: AsyncRead + Unpin> TrailerReader<R> { - pub fn new(reader: R, data_len: u8) -> Self { - Self::Reading(read_trailer(reader, data_len)) - } -} - -impl<R: AsyncRead + Unpin> AsyncRead for TrailerReader<R> { - fn poll_read( - mut self: Pin<&mut Self>, - cx: &mut task::Context, - user_buf: &mut ReadBuf, - ) -> Poll<io::Result<()>> { - let this = &mut *self; - - loop { - match this { - Self::Reading(fut) => { - *this = Self::Releasing { - off: 0, - data: ready!(Pin::new(fut).poll(cx))?, - }; - } - Self::Releasing { off: 8, .. } => { - *this = Self::Done; - } - Self::Releasing { off, data } => { - assert_ne!(user_buf.remaining(), 0); - - let buf = &data[*off as usize..]; - let buf = &buf[..usize::min(buf.len(), user_buf.remaining())]; - - user_buf.put_slice(buf); - *off += buf.len() as u8; - - break; - } - Self::Done => break, - } - } - - Ok(()).into() - } -} - #[cfg(test)] mod tests { use std::time::Duration; - use tokio::io::AsyncReadExt; use super::*; @@ -196,11 +150,8 @@ mod tests { .read(&[0xef, 0x00]) .build(); - let mut reader = TrailerReader::new(reader, 2); - - let mut buf = vec![]; assert_eq!( - reader.read_to_end(&mut buf).await.unwrap_err().kind(), + read_trailer::<_, Pad>(reader, 2).await.unwrap_err().kind(), io::ErrorKind::UnexpectedEof ); } @@ -214,11 +165,8 @@ mod tests { .wait(Duration::ZERO) .build(); - let mut reader = TrailerReader::new(reader, 2); - - let mut buf = vec![]; assert_eq!( - reader.read_to_end(&mut buf).await.unwrap_err().kind(), + read_trailer::<_, Pad>(reader, 2).await.unwrap_err().kind(), io::ErrorKind::InvalidData ); } @@ -233,21 +181,17 @@ mod tests { .read(&[0x00, 0x00, 0x00, 0x00, 0x00]) .build(); - let mut reader = TrailerReader::new(reader, 2); - - let mut buf = vec![]; - reader.read_to_end(&mut buf).await.unwrap(); - - assert_eq!(buf, &[0xed, 0xef]); + assert_eq!( + &*read_trailer::<_, Pad>(reader, 2).await.unwrap(), + &[0xed, 0xef] + ); } #[tokio::test] async fn no_padding() { - let reader = tokio_test::io::Builder::new().build(); - let mut reader = TrailerReader::new(reader, 0); - - let mut buf = vec![]; - reader.read_to_end(&mut buf).await.unwrap(); - assert!(buf.is_empty()); + assert!(read_trailer::<_, Pad>(io::empty(), 0) + .await + .unwrap() + .is_empty()); } } diff --git a/tvix/nix-compat/src/wire/mod.rs b/tvix/nix-compat/src/wire/mod.rs index 65c053d58e..a197e3a1f4 100644 --- a/tvix/nix-compat/src/wire/mod.rs +++ b/tvix/nix-compat/src/wire/mod.rs @@ -3,6 +3,3 @@ mod bytes; pub use bytes::*; - -mod primitive; -pub use primitive::*; diff --git a/tvix/nix-compat/src/wire/primitive.rs b/tvix/nix-compat/src/wire/primitive.rs deleted file mode 100644 index ee0f5fc427..0000000000 --- a/tvix/nix-compat/src/wire/primitive.rs +++ /dev/null @@ -1,74 +0,0 @@ -// SPDX-FileCopyrightText: 2023 embr <git@liclac.eu> -// -// SPDX-License-Identifier: EUPL-1.2 - -use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt}; - -#[allow(dead_code)] -/// Read a u64 from the AsyncRead (little endian). -pub async fn read_u64<R: AsyncReadExt + Unpin>(r: &mut R) -> std::io::Result<u64> { - r.read_u64_le().await -} - -/// Write a u64 to the AsyncWrite (little endian). -pub async fn write_u64<W: AsyncWrite + Unpin>(w: &mut W, v: u64) -> std::io::Result<()> { - w.write_u64_le(v).await -} - -#[allow(dead_code)] -/// Read a boolean from the AsyncRead, encoded as u64 (>0 is true). -pub async fn read_bool<R: AsyncRead + Unpin>(r: &mut R) -> std::io::Result<bool> { - Ok(read_u64(r).await? > 0) -} - -#[allow(dead_code)] -/// Write a boolean to the AsyncWrite, encoded as u64 (>0 is true). -pub async fn write_bool<W: AsyncWrite + Unpin>(w: &mut W, v: bool) -> std::io::Result<()> { - write_u64(w, if v { 1u64 } else { 0u64 }).await -} - -#[cfg(test)] -mod tests { - use super::*; - use tokio_test::io::Builder; - - // Integers. - #[tokio::test] - async fn test_read_u64() { - let mut mock = Builder::new().read(&1234567890u64.to_le_bytes()).build(); - assert_eq!(1234567890u64, read_u64(&mut mock).await.unwrap()); - } - #[tokio::test] - async fn test_write_u64() { - let mut mock = Builder::new().write(&1234567890u64.to_le_bytes()).build(); - write_u64(&mut mock, 1234567890).await.unwrap(); - } - - // Booleans. - #[tokio::test] - async fn test_read_bool_0() { - let mut mock = Builder::new().read(&0u64.to_le_bytes()).build(); - assert!(!read_bool(&mut mock).await.unwrap()); - } - #[tokio::test] - async fn test_read_bool_1() { - let mut mock = Builder::new().read(&1u64.to_le_bytes()).build(); - assert!(read_bool(&mut mock).await.unwrap()); - } - #[tokio::test] - async fn test_read_bool_2() { - let mut mock = Builder::new().read(&2u64.to_le_bytes()).build(); - assert!(read_bool(&mut mock).await.unwrap()); - } - - #[tokio::test] - async fn test_write_bool_false() { - let mut mock = Builder::new().write(&0u64.to_le_bytes()).build(); - write_bool(&mut mock, false).await.unwrap(); - } - #[tokio::test] - async fn test_write_bool_true() { - let mut mock = Builder::new().write(&1u64.to_le_bytes()).build(); - write_bool(&mut mock, true).await.unwrap(); - } -} diff --git a/tvix/shell.nix b/tvix/shell.nix index 422f1c8dd4..f0d8ab1657 100644 --- a/tvix/shell.nix +++ b/tvix/shell.nix @@ -29,12 +29,10 @@ pkgs.mkShell { pkgs.cargo pkgs.cargo-machete pkgs.cargo-expand - pkgs.cbtemulator pkgs.clippy pkgs.evans pkgs.fuse pkgs.go - pkgs.google-cloud-bigtable-tool pkgs.grpcurl pkgs.hyperfine pkgs.mdbook diff --git a/tvix/store/Cargo.toml b/tvix/store/Cargo.toml index b549eeb7f5..26bd249b12 100644 --- a/tvix/store/Cargo.toml +++ b/tvix/store/Cargo.toml @@ -5,6 +5,7 @@ edition = "2021" [dependencies] anyhow = "1.0.68" +async-compression = { version = "0.4.9", features = ["tokio", "bzip2", "gzip", "xz", "zstd"]} async-stream = "0.3.5" blake3 = { version = "1.3.1", features = ["rayon", "std"] } bstr = "1.6.0" @@ -39,9 +40,9 @@ tracing-subscriber = { version = "0.3.16", features = ["env-filter", "json"] } tvix-castore = { path = "../castore" } url = "2.4.0" walkdir = "2.4.0" -async-recursion = "1.0.5" reqwest = { version = "0.11.22", features = ["rustls-tls-native-roots", "stream"], default-features = false } -xz2 = "0.1.7" +lru = "0.12.3" +parking_lot = "0.12.2" [dependencies.tonic-reflection] optional = true @@ -74,3 +75,10 @@ fuse = ["tvix-castore/fuse"] otlp = ["dep:opentelemetry", "dep:opentelemetry-otlp", "dep:opentelemetry_sdk"] tonic-reflection = ["dep:tonic-reflection", "tvix-castore/tonic-reflection"] virtiofs = ["tvix-castore/virtiofs"] +# Whether to run the integration tests. +# Requires the following packages in $PATH: +# cbtemulator, google-cloud-bigtable-tool +integration = [] + +[lints] +workspace = true diff --git a/tvix/store/default.nix b/tvix/store/default.nix index f30923ac27..ad47994f24 100644 --- a/tvix/store/default.nix +++ b/tvix/store/default.nix @@ -26,7 +26,6 @@ in runTests = true; testPreRun = '' export SSL_CERT_FILE=${pkgs.cacert}/etc/ssl/certs/ca-bundle.crt - export PATH="$PATH:${pkgs.lib.makeBinPath [pkgs.cbtemulator pkgs.google-cloud-bigtable-tool]}" ''; # enable some optional features. @@ -34,7 +33,20 @@ in # virtiofs feature currently fails to build on Darwin. ++ pkgs.lib.optional pkgs.stdenv.isLinux "virtiofs"; }).overrideAttrs (_: { + meta.ci.targets = [ "integration-tests" ]; meta.ci.extraSteps = { import-docs = (mkImportCheck "tvix/store/docs" ./docs); }; + passthru.integration-tests = depot.tvix.crates.workspaceMembers.tvix-store.build.override { + runTests = true; + testPreRun = '' + export SSL_CERT_FILE=${pkgs.cacert}/etc/ssl/certs/ca-bundle.crt + export PATH="$PATH:${pkgs.lib.makeBinPath [pkgs.cbtemulator pkgs.google-cloud-bigtable-tool]}" + ''; + + # enable some optional features. + features = [ "default" "cloud" "integration" ] + # virtiofs feature currently fails to build on Darwin. + ++ pkgs.lib.optional pkgs.stdenv.isLinux "virtiofs"; + }; }) diff --git a/tvix/store/src/bin/tvix-store.rs b/tvix/store/src/bin/tvix-store.rs index 4662cf67d5..906d0ab520 100644 --- a/tvix/store/src/bin/tvix-store.rs +++ b/tvix/store/src/bin/tvix-store.rs @@ -19,6 +19,7 @@ use tracing_subscriber::EnvFilter; use tracing_subscriber::Layer; use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt}; use tvix_castore::import::fs::ingest_path; +use tvix_store::nar::NarCalculationService; use tvix_store::proto::NarInfo; use tvix_store::proto::PathInfo; @@ -78,7 +79,11 @@ enum Commands { #[arg(long, short = 'l')] listen_address: Option<String>, - #[arg(long, env, default_value = "sled:///var/lib/tvix-store/blobs.sled")] + #[arg( + long, + env, + default_value = "objectstore+file:///var/lib/tvix-store/blobs.object_store" + )] blob_service_addr: String, #[arg( @@ -282,7 +287,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> { path_info_service_addr, } => { // initialize stores - let (blob_service, directory_service, path_info_service) = + let (blob_service, directory_service, path_info_service, nar_calculation_service) = tvix_store::utils::construct_services( blob_service_addr, directory_service_addr, @@ -307,6 +312,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> { )) .add_service(PathInfoServiceServer::new(GRPCPathInfoServiceWrapper::new( Arc::from(path_info_service), + nar_calculation_service, ))); #[cfg(feature = "tonic-reflection")] @@ -336,7 +342,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> { path_info_service_addr, } => { // FUTUREWORK: allow flat for single files? - let (blob_service, directory_service, path_info_service) = + let (blob_service, directory_service, path_info_service, nar_calculation_service) = tvix_store::utils::construct_services( blob_service_addr, directory_service_addr, @@ -344,8 +350,10 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> { ) .await?; - // Arc the PathInfoService, as we clone it . + // Arc PathInfoService and NarCalculationService, as we clone it . let path_info_service: Arc<dyn PathInfoService> = path_info_service.into(); + let nar_calculation_service: Arc<dyn NarCalculationService> = + nar_calculation_service.into(); let tasks = paths .into_iter() @@ -354,6 +362,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> { let blob_service = blob_service.clone(); let directory_service = directory_service.clone(); let path_info_service = path_info_service.clone(); + let nar_calculation_service = nar_calculation_service.clone(); async move { if let Ok(name) = tvix_store::import::path_to_name(&path) { @@ -363,6 +372,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> { blob_service, directory_service, path_info_service, + nar_calculation_service, ) .await; if let Ok(output_path) = resp { @@ -383,7 +393,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> { path_info_service_addr, reference_graph_path, } => { - let (blob_service, directory_service, path_info_service) = + let (blob_service, directory_service, path_info_service, _nar_calculation_service) = tvix_store::utils::construct_services( blob_service_addr, directory_service_addr, @@ -490,7 +500,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> { allow_other, show_xattr, } => { - let (blob_service, directory_service, path_info_service) = + let (blob_service, directory_service, path_info_service, _nar_calculation_service) = tvix_store::utils::construct_services( blob_service_addr, directory_service_addr, @@ -532,7 +542,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> { list_root, show_xattr, } => { - let (blob_service, directory_service, path_info_service) = + let (blob_service, directory_service, path_info_service, _nar_calculation_service) = tvix_store::utils::construct_services( blob_service_addr, directory_service_addr, diff --git a/tvix/store/src/import.rs b/tvix/store/src/import.rs index 7b6aeb824e..888380bca9 100644 --- a/tvix/store/src/import.rs +++ b/tvix/store/src/import.rs @@ -11,6 +11,7 @@ use nix_compat::{ }; use crate::{ + nar::NarCalculationService, pathinfoservice::PathInfoService, proto::{nar_info, NarInfo, PathInfo}, }; @@ -104,23 +105,27 @@ pub fn derive_nar_ca_path_info( /// Ingest the given path `path` and register the resulting output path in the /// [`PathInfoService`] as a recursive fixed output NAR. #[instrument(skip_all, fields(store_name=name, path=?path), err)] -pub async fn import_path_as_nar_ca<BS, DS, PS, P>( +pub async fn import_path_as_nar_ca<BS, DS, PS, NS, P>( path: P, name: &str, blob_service: BS, directory_service: DS, path_info_service: PS, + nar_calculation_service: NS, ) -> Result<StorePath, std::io::Error> where P: AsRef<Path> + std::fmt::Debug, BS: BlobService + Clone, - DS: AsRef<dyn DirectoryService>, + DS: DirectoryService, PS: AsRef<dyn PathInfoService>, + NS: NarCalculationService, { - let root_node = ingest_path(blob_service, directory_service, path.as_ref()).await?; + let root_node = ingest_path(blob_service, directory_service, path.as_ref()) + .await + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?; - // Ask the PathInfoService for the NAR size and sha256 - let (nar_size, nar_sha256) = path_info_service.as_ref().calculate_nar(&root_node).await?; + // Ask for the NAR size and sha256 + let (nar_size, nar_sha256) = nar_calculation_service.calculate_nar(&root_node).await?; // Calculate the output path. This might still fail, as some names are illegal. // FUTUREWORK: express the `name` at the type level to be valid and move the conversion diff --git a/tvix/store/src/nar/import.rs b/tvix/store/src/nar/import.rs index 6f4dcdea5d..3d7c50014a 100644 --- a/tvix/store/src/nar/import.rs +++ b/tvix/store/src/nar/import.rs @@ -1,225 +1,122 @@ -use bytes::Bytes; -use nix_compat::nar; -use std::io::{self, BufRead}; -use tokio_util::io::SyncIoBridge; -use tracing::warn; +use nix_compat::nar::reader::r#async as nar_reader; +use tokio::{io::AsyncBufRead, sync::mpsc, try_join}; use tvix_castore::{ blobservice::BlobService, - directoryservice::{DirectoryPutter, DirectoryService}, - proto::{self as castorepb}, - B3Digest, + directoryservice::DirectoryService, + import::{ingest_entries, IngestionEntry, IngestionError}, + proto::{node::Node, NamedNode}, + PathBuf, }; -/// Accepts a reader providing a NAR. -/// Will traverse it, uploading blobs to the given [BlobService], and -/// directories to the given [DirectoryService]. -/// On success, the root node is returned. -/// This function is not async (because the NAR reader is not) -/// and calls [tokio::task::block_in_place] when interacting with backing -/// services, so make sure to only call this with spawn_blocking. -pub fn read_nar<R, BS, DS>( - r: &mut R, +/// Ingests the contents from a [AsyncRead] providing NAR into the tvix store, +/// interacting with a [BlobService] and [DirectoryService]. +/// It returns the castore root node or an error. +pub async fn ingest_nar<R, BS, DS>( blob_service: BS, directory_service: DS, -) -> io::Result<castorepb::node::Node> + r: &mut R, +) -> Result<Node, IngestionError<Error>> where - R: BufRead + Send, - BS: AsRef<dyn BlobService>, - DS: AsRef<dyn DirectoryService>, + R: AsyncBufRead + Unpin + Send, + BS: BlobService + Clone, + DS: DirectoryService, { - let handle = tokio::runtime::Handle::current(); + // open the NAR for reading. + // The NAR reader emits nodes in DFS preorder. + let root_node = nar_reader::open(r).await.map_err(Error::IO)?; - let directory_putter = directory_service.as_ref().put_multiple_start(); + let (tx, rx) = mpsc::channel(1); + let rx = tokio_stream::wrappers::ReceiverStream::new(rx); - let node = nix_compat::nar::reader::open(r)?; - let (root_node, mut directory_putter, _) = process_node( - handle.clone(), - "".into(), // this is the root node, it has an empty name - node, - &blob_service, - directory_putter, - )?; + let produce = async move { + let res = produce_nar_inner( + blob_service, + root_node, + "root".parse().unwrap(), // HACK: the root node sent to ingest_entries may not be ROOT. + tx.clone(), + ) + .await; - // In case the root node points to a directory, we need to close - // [directory_putter], and ensure the digest we got back from there matches - // what the root node is pointing to. - if let castorepb::node::Node::Directory(ref directory_node) = root_node { - // Close directory_putter to make sure all directories have been inserted. - let directory_putter_digest = - handle.block_on(handle.spawn(async move { directory_putter.close().await }))??; - let root_directory_node_digest: B3Digest = - directory_node.digest.clone().try_into().unwrap(); + tx.send(res) + .await + .map_err(|e| Error::IO(std::io::Error::new(std::io::ErrorKind::BrokenPipe, e)))?; - if directory_putter_digest != root_directory_node_digest { - warn!( - root_directory_node_digest = %root_directory_node_digest, - directory_putter_digest =%directory_putter_digest, - "directory digest mismatch", - ); - return Err(io::Error::new( - io::ErrorKind::Other, - "directory digest mismatch", - )); - } - } - // In case it's not a Directory, [directory_putter] doesn't need to be - // closed (as we didn't end up uploading anything). - // It can just be dropped, as documented in its trait. + Ok(()) + }; + + let consume = ingest_entries(directory_service, rx); - Ok(root_node) + let (_, node) = try_join!(produce, consume)?; + + // remove the fake "root" name again + debug_assert_eq!(&node.get_name(), b"root"); + Ok(node.rename("".into())) } -/// This is called on a [nar::reader::Node] and returns a [castorepb::node::Node]. -/// It does so by handling all three kinds, and recursing for directories. -/// -/// [DirectoryPutter] is passed around, so a single instance of it can be used, -/// which is sufficient, as this reads through the whole NAR linerarly. -fn process_node<BS>( - handle: tokio::runtime::Handle, - name: bytes::Bytes, - node: nar::reader::Node, +async fn produce_nar_inner<BS>( blob_service: BS, - directory_putter: Box<dyn DirectoryPutter>, -) -> io::Result<(castorepb::node::Node, Box<dyn DirectoryPutter>, BS)> + node: nar_reader::Node<'_, '_>, + path: PathBuf, + tx: mpsc::Sender<Result<IngestionEntry, Error>>, +) -> Result<IngestionEntry, Error> where - BS: AsRef<dyn BlobService>, + BS: BlobService + Clone, { Ok(match node { - nar::reader::Node::Symlink { target } => ( - castorepb::node::Node::Symlink(castorepb::SymlinkNode { - name, - target: target.into(), - }), - directory_putter, - blob_service, - ), - nar::reader::Node::File { executable, reader } => ( - castorepb::node::Node::File(process_file_reader( - handle, - name, - reader, + nar_reader::Node::Symlink { target } => IngestionEntry::Symlink { path, target }, + nar_reader::Node::File { + executable, + mut reader, + } => { + let (digest, size) = { + let mut blob_writer = blob_service.open_write().await; + let size = tokio::io::copy_buf(&mut reader, &mut blob_writer).await?; + + (blob_writer.close().await?, size) + }; + + IngestionEntry::Regular { + path, + size, executable, - &blob_service, - )?), - directory_putter, - blob_service, - ), - nar::reader::Node::Directory(dir_reader) => { - let (directory_node, directory_putter, blob_service_back) = - process_dir_reader(handle, name, dir_reader, blob_service, directory_putter)?; - - ( - castorepb::node::Node::Directory(directory_node), - directory_putter, - blob_service_back, - ) + digest, + } + } + nar_reader::Node::Directory(mut dir_reader) => { + while let Some(entry) = dir_reader.next().await? { + let mut path = path.clone(); + + // valid NAR names are valid castore names + path.try_push(entry.name) + .expect("Tvix bug: failed to join name"); + + let entry = Box::pin(produce_nar_inner( + blob_service.clone(), + entry.node, + path, + tx.clone(), + )) + .await?; + + tx.send(Ok(entry)).await.map_err(|e| { + Error::IO(std::io::Error::new(std::io::ErrorKind::BrokenPipe, e)) + })?; + } + + IngestionEntry::Dir { path } } }) } -/// Given a name and [nar::reader::FileReader], this ingests the file into the -/// passed [BlobService] and returns a [castorepb::FileNode]. -fn process_file_reader<BS>( - handle: tokio::runtime::Handle, - name: Bytes, - mut file_reader: nar::reader::FileReader, - executable: bool, - blob_service: BS, -) -> io::Result<castorepb::FileNode> -where - BS: AsRef<dyn BlobService>, -{ - // store the length. If we read any other length, reading will fail. - let expected_len = file_reader.len(); - - // prepare writing a new blob. - let blob_writer = handle.block_on(async { blob_service.as_ref().open_write().await }); - - // write the blob. - let mut blob_writer = { - let mut dst = SyncIoBridge::new(blob_writer); - - file_reader.copy(&mut dst)?; - dst.shutdown()?; - - // return back the blob_writer - dst.into_inner() - }; - - // close the blob_writer, retrieve the digest. - let blob_digest = handle.block_on(async { blob_writer.close().await })?; - - Ok(castorepb::FileNode { - name, - digest: blob_digest.into(), - size: expected_len, - executable, - }) -} - -/// Given a name and [nar::reader::DirReader], this returns a [castorepb::DirectoryNode]. -/// It uses [process_node] to iterate over all children. -/// -/// [DirectoryPutter] is passed around, so a single instance of it can be used, -/// which is sufficient, as this reads through the whole NAR linerarly. -fn process_dir_reader<BS>( - handle: tokio::runtime::Handle, - name: Bytes, - mut dir_reader: nar::reader::DirReader, - blob_service: BS, - directory_putter: Box<dyn DirectoryPutter>, -) -> io::Result<(castorepb::DirectoryNode, Box<dyn DirectoryPutter>, BS)> -where - BS: AsRef<dyn BlobService>, -{ - let mut directory = castorepb::Directory::default(); - - let mut directory_putter = directory_putter; - let mut blob_service = blob_service; - while let Some(entry) = dir_reader.next()? { - let (node, directory_putter_back, blob_service_back) = process_node( - handle.clone(), - entry.name.into(), - entry.node, - blob_service, - directory_putter, - )?; - - blob_service = blob_service_back; - directory_putter = directory_putter_back; - - match node { - castorepb::node::Node::Directory(node) => directory.directories.push(node), - castorepb::node::Node::File(node) => directory.files.push(node), - castorepb::node::Node::Symlink(node) => directory.symlinks.push(node), - } - } - - // calculate digest and size. - let directory_digest = directory.digest(); - let directory_size = directory.size(); - - // upload the directory. This is a bit more verbose, as we want to get back - // directory_putter for later reuse. - let directory_putter = handle.block_on(handle.spawn(async move { - directory_putter.put(directory).await?; - Ok::<_, io::Error>(directory_putter) - }))??; - - Ok(( - castorepb::DirectoryNode { - name, - digest: directory_digest.into(), - size: directory_size, - }, - directory_putter, - blob_service, - )) +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error(transparent)] + IO(#[from] std::io::Error), } #[cfg(test)] mod test { - use crate::nar::read_nar; + use crate::nar::ingest_nar; use std::io::Cursor; use std::sync::Arc; @@ -244,19 +141,13 @@ mod test { blob_service: Arc<dyn BlobService>, directory_service: Arc<dyn DirectoryService>, ) { - let handle = tokio::runtime::Handle::current(); - - let root_node = handle - .spawn_blocking(|| { - read_nar( - &mut Cursor::new(&NAR_CONTENTS_SYMLINK.clone()), - blob_service, - directory_service, - ) - }) - .await - .unwrap() - .expect("must parse"); + let root_node = ingest_nar( + blob_service, + directory_service, + &mut Cursor::new(&NAR_CONTENTS_SYMLINK.clone()), + ) + .await + .expect("must parse"); assert_eq!( castorepb::node::Node::Symlink(castorepb::SymlinkNode { @@ -273,22 +164,13 @@ mod test { blob_service: Arc<dyn BlobService>, directory_service: Arc<dyn DirectoryService>, ) { - let handle = tokio::runtime::Handle::current(); - - let root_node = handle - .spawn_blocking({ - let blob_service = blob_service.clone(); - move || { - read_nar( - &mut Cursor::new(&NAR_CONTENTS_HELLOWORLD.clone()), - blob_service, - directory_service, - ) - } - }) - .await - .unwrap() - .expect("must parse"); + let root_node = ingest_nar( + blob_service.clone(), + directory_service, + &mut Cursor::new(&NAR_CONTENTS_HELLOWORLD.clone()), + ) + .await + .expect("must parse"); assert_eq!( castorepb::node::Node::File(castorepb::FileNode { @@ -310,23 +192,13 @@ mod test { blob_service: Arc<dyn BlobService>, directory_service: Arc<dyn DirectoryService>, ) { - let handle = tokio::runtime::Handle::current(); - - let root_node = handle - .spawn_blocking({ - let blob_service = blob_service.clone(); - let directory_service = directory_service.clone(); - || { - read_nar( - &mut Cursor::new(&NAR_CONTENTS_COMPLICATED.clone()), - blob_service, - directory_service, - ) - } - }) - .await - .unwrap() - .expect("must parse"); + let root_node = ingest_nar( + blob_service.clone(), + directory_service.clone(), + &mut Cursor::new(&NAR_CONTENTS_COMPLICATED.clone()), + ) + .await + .expect("must parse"); assert_eq!( castorepb::node::Node::Directory(castorepb::DirectoryNode { diff --git a/tvix/store/src/nar/mod.rs b/tvix/store/src/nar/mod.rs index 49bb92fb0f..164748a655 100644 --- a/tvix/store/src/nar/mod.rs +++ b/tvix/store/src/nar/mod.rs @@ -1,10 +1,36 @@ +use tonic::async_trait; use tvix_castore::B3Digest; mod import; mod renderer; -pub use import::read_nar; +pub use import::ingest_nar; pub use renderer::calculate_size_and_sha256; pub use renderer::write_nar; +pub use renderer::SimpleRenderer; +use tvix_castore::proto as castorepb; + +#[async_trait] +pub trait NarCalculationService: Send + Sync { + /// Return the nar size and nar sha256 digest for a given root node. + /// This can be used to calculate NAR-based output paths. + async fn calculate_nar( + &self, + root_node: &castorepb::node::Node, + ) -> Result<(u64, [u8; 32]), tvix_castore::Error>; +} + +#[async_trait] +impl<A> NarCalculationService for A +where + A: AsRef<dyn NarCalculationService> + Send + Sync, +{ + async fn calculate_nar( + &self, + root_node: &castorepb::node::Node, + ) -> Result<(u64, [u8; 32]), tvix_castore::Error> { + self.as_ref().calculate_nar(root_node).await + } +} /// Errors that can encounter while rendering NARs. #[derive(Debug, thiserror::Error)] diff --git a/tvix/store/src/nar/renderer.rs b/tvix/store/src/nar/renderer.rs index 0816b8e973..efd67671db 100644 --- a/tvix/store/src/nar/renderer.rs +++ b/tvix/store/src/nar/renderer.rs @@ -1,17 +1,51 @@ use crate::utils::AsyncIoBridge; -use super::RenderError; -use async_recursion::async_recursion; +use super::{NarCalculationService, RenderError}; use count_write::CountWrite; use nix_compat::nar::writer::r#async as nar_writer; use sha2::{Digest, Sha256}; use tokio::io::{self, AsyncWrite, BufReader}; +use tonic::async_trait; use tvix_castore::{ blobservice::BlobService, directoryservice::DirectoryService, proto::{self as castorepb, NamedNode}, }; +pub struct SimpleRenderer<BS, DS> { + blob_service: BS, + directory_service: DS, +} + +impl<BS, DS> SimpleRenderer<BS, DS> { + pub fn new(blob_service: BS, directory_service: DS) -> Self { + Self { + blob_service, + directory_service, + } + } +} + +#[async_trait] +impl<BS, DS> NarCalculationService for SimpleRenderer<BS, DS> +where + BS: BlobService + Clone, + DS: DirectoryService + Clone, +{ + async fn calculate_nar( + &self, + root_node: &castorepb::node::Node, + ) -> Result<(u64, [u8; 32]), tvix_castore::Error> { + calculate_size_and_sha256( + root_node, + self.blob_service.clone(), + self.directory_service.clone(), + ) + .await + .map_err(|e| tvix_castore::Error::StorageError(format!("failed rendering nar: {}", e))) + } +} + /// Invoke [write_nar], and return the size and sha256 digest of the produced /// NAR output. pub async fn calculate_size_and_sha256<BS, DS>( @@ -72,9 +106,8 @@ where /// Process an intermediate node in the structure. /// This consumes the node. -#[async_recursion] async fn walk_node<BS, DS>( - nar_node: nar_writer::Node<'async_recursion, '_>, + nar_node: nar_writer::Node<'_, '_>, proto_node: &castorepb::node::Node, blob_service: BS, directory_service: DS, @@ -164,9 +197,13 @@ where .await .map_err(RenderError::NARWriterError)?; - (blob_service, directory_service) = - walk_node(child_node, &proto_node, blob_service, directory_service) - .await?; + (blob_service, directory_service) = Box::pin(walk_node( + child_node, + &proto_node, + blob_service, + directory_service, + )) + .await?; } // close the directory diff --git a/tvix/store/src/pathinfoservice/bigtable.rs b/tvix/store/src/pathinfoservice/bigtable.rs index 6fb52abbfd..7df9989fc5 100644 --- a/tvix/store/src/pathinfoservice/bigtable.rs +++ b/tvix/store/src/pathinfoservice/bigtable.rs @@ -6,12 +6,12 @@ use bigtable_rs::{bigtable, google::bigtable::v2 as bigtable_v2}; use bytes::Bytes; use data_encoding::HEXLOWER; use futures::stream::BoxStream; +use nix_compat::nixbase32; use prost::Message; use serde::{Deserialize, Serialize}; use serde_with::{serde_as, DurationSeconds}; use tonic::async_trait; -use tracing::trace; -use tvix_castore::proto as castorepb; +use tracing::{instrument, trace}; use tvix_castore::Error; /// There should not be more than 10 MiB in a single cell. @@ -182,6 +182,7 @@ fn derive_pathinfo_key(digest: &[u8; 20]) -> String { #[async_trait] impl PathInfoService for BigtablePathInfoService { + #[instrument(level = "trace", skip_all, fields(path_info.digest = nixbase32::encode(&digest)))] async fn get(&self, digest: [u8; 20]) -> Result<Option<PathInfo>, Error> { let mut client = self.client.clone(); let path_info_key = derive_pathinfo_key(&digest); @@ -278,6 +279,7 @@ impl PathInfoService for BigtablePathInfoService { Ok(Some(path_info)) } + #[instrument(level = "trace", skip_all, fields(path_info.root_node = ?path_info.node))] async fn put(&self, path_info: PathInfo) -> Result<PathInfo, Error> { let store_path = path_info .validate() @@ -330,13 +332,6 @@ impl PathInfoService for BigtablePathInfoService { Ok(path_info) } - async fn calculate_nar( - &self, - _root_node: &castorepb::node::Node, - ) -> Result<(u64, [u8; 32]), Error> { - return Err(Error::StorageError("unimplemented".into())); - } - fn list(&self) -> BoxStream<'static, Result<PathInfo, Error>> { let mut client = self.client.clone(); diff --git a/tvix/store/src/pathinfoservice/combinators.rs b/tvix/store/src/pathinfoservice/combinators.rs new file mode 100644 index 0000000000..664144ef49 --- /dev/null +++ b/tvix/store/src/pathinfoservice/combinators.rs @@ -0,0 +1,111 @@ +use crate::proto::PathInfo; +use futures::stream::BoxStream; +use nix_compat::nixbase32; +use tonic::async_trait; +use tracing::{debug, instrument}; +use tvix_castore::Error; + +use super::PathInfoService; + +/// Asks near first, if not found, asks far. +/// If found in there, returns it, and *inserts* it into +/// near. +/// There is no negative cache. +/// Inserts and listings are not implemented for now. +pub struct Cache<PS1, PS2> { + near: PS1, + far: PS2, +} + +impl<PS1, PS2> Cache<PS1, PS2> { + pub fn new(near: PS1, far: PS2) -> Self { + Self { near, far } + } +} + +#[async_trait] +impl<PS1, PS2> PathInfoService for Cache<PS1, PS2> +where + PS1: PathInfoService, + PS2: PathInfoService, +{ + #[instrument(level = "trace", skip_all, fields(path_info.digest = nixbase32::encode(&digest)))] + async fn get(&self, digest: [u8; 20]) -> Result<Option<PathInfo>, Error> { + match self.near.get(digest).await? { + Some(path_info) => { + debug!("serving from cache"); + Ok(Some(path_info)) + } + None => { + debug!("not found in near, asking remote…"); + match self.far.get(digest).await? { + None => Ok(None), + Some(path_info) => { + debug!("found in remote, adding to cache"); + self.near.put(path_info.clone()).await?; + Ok(Some(path_info)) + } + } + } + } + } + + async fn put(&self, _path_info: PathInfo) -> Result<PathInfo, Error> { + Err(Error::StorageError("unimplemented".to_string())) + } + + fn list(&self) -> BoxStream<'static, Result<PathInfo, Error>> { + Box::pin(tokio_stream::once(Err(Error::StorageError( + "unimplemented".to_string(), + )))) + } +} + +#[cfg(test)] +mod test { + use std::num::NonZeroUsize; + + use crate::{ + pathinfoservice::{LruPathInfoService, MemoryPathInfoService, PathInfoService}, + tests::fixtures::PATH_INFO_WITH_NARINFO, + }; + + const PATH_INFO_DIGEST: [u8; 20] = [0; 20]; + + /// Helper function setting up an instance of a "far" and "near" + /// PathInfoService. + async fn create_pathinfoservice() -> super::Cache<LruPathInfoService, MemoryPathInfoService> { + // Create an instance of a "far" PathInfoService. + let far = MemoryPathInfoService::default(); + + // … and an instance of a "near" PathInfoService. + let near = LruPathInfoService::with_capacity(NonZeroUsize::new(1).unwrap()); + + // create a Pathinfoservice combining the two and return it. + super::Cache::new(near, far) + } + + /// Getting from the far backend is gonna insert it into the near one. + #[tokio::test] + async fn test_populate_cache() { + let svc = create_pathinfoservice().await; + + // query the PathInfo, things should not be there. + assert!(svc.get(PATH_INFO_DIGEST).await.unwrap().is_none()); + + // insert it into the far one. + svc.far.put(PATH_INFO_WITH_NARINFO.clone()).await.unwrap(); + + // now try getting it again, it should succeed. + assert_eq!( + Some(PATH_INFO_WITH_NARINFO.clone()), + svc.get(PATH_INFO_DIGEST).await.unwrap() + ); + + // peek near, it should now be there. + assert_eq!( + Some(PATH_INFO_WITH_NARINFO.clone()), + svc.near.get(PATH_INFO_DIGEST).await.unwrap() + ); + } +} diff --git a/tvix/store/src/pathinfoservice/from_addr.rs b/tvix/store/src/pathinfoservice/from_addr.rs index 1ff822ad35..455909e7f2 100644 --- a/tvix/store/src/pathinfoservice/from_addr.rs +++ b/tvix/store/src/pathinfoservice/from_addr.rs @@ -47,7 +47,7 @@ pub async fn from_addr( if url.has_host() || !url.path().is_empty() { return Err(Error::StorageError("invalid url".to_string())); } - Box::new(MemoryPathInfoService::new(blob_service, directory_service)) + Box::<MemoryPathInfoService>::default() } "sled" => { // sled doesn't support host, and a path can be provided (otherwise @@ -65,10 +65,10 @@ pub async fn from_addr( // TODO: expose other parameters as URL parameters? Box::new(if url.path().is_empty() { - SledPathInfoService::new_temporary(blob_service, directory_service) + SledPathInfoService::new_temporary() .map_err(|e| Error::StorageError(e.to_string()))? } else { - SledPathInfoService::new(url.path(), blob_service, directory_service) + SledPathInfoService::new(url.path()) .map_err(|e| Error::StorageError(e.to_string()))? }) } @@ -208,7 +208,7 @@ mod tests { #[case::grpc_invalid_host_and_path("grpc+http://localhost/some-path", false)] /// A valid example for Bigtable. #[cfg_attr( - feature = "cloud", + all(feature = "cloud", feature = "integration"), case::bigtable_valid( "bigtable://instance-1?project_id=project-1&table_name=table-1&family_name=cf1", true @@ -216,7 +216,7 @@ mod tests { )] /// An invalid example for Bigtable, missing fields #[cfg_attr( - feature = "cloud", + all(feature = "cloud", feature = "integration"), case::bigtable_invalid_missing_fields("bigtable://instance-1", false) )] #[tokio::test] diff --git a/tvix/store/src/pathinfoservice/grpc.rs b/tvix/store/src/pathinfoservice/grpc.rs index 1138ebdc19..93d2d67c31 100644 --- a/tvix/store/src/pathinfoservice/grpc.rs +++ b/tvix/store/src/pathinfoservice/grpc.rs @@ -1,8 +1,11 @@ use super::PathInfoService; -use crate::proto::{self, ListPathInfoRequest, PathInfo}; +use crate::{ + nar::NarCalculationService, + proto::{self, ListPathInfoRequest, PathInfo}, +}; use async_stream::try_stream; -use data_encoding::BASE64; use futures::stream::BoxStream; +use nix_compat::nixbase32; use tonic::{async_trait, transport::Channel, Code}; use tracing::instrument; use tvix_castore::{proto as castorepb, Error}; @@ -27,7 +30,7 @@ impl GRPCPathInfoService { #[async_trait] impl PathInfoService for GRPCPathInfoService { - #[instrument(level = "trace", skip_all, fields(path_info.digest = BASE64.encode(&digest)))] + #[instrument(level = "trace", skip_all, fields(path_info.digest = nixbase32::encode(&digest)))] async fn get(&self, digest: [u8; 20]) -> Result<Option<PathInfo>, Error> { let path_info = self .grpc_client @@ -67,30 +70,6 @@ impl PathInfoService for GRPCPathInfoService { Ok(path_info) } - #[instrument(level = "trace", skip_all, fields(root_node = ?root_node))] - async fn calculate_nar( - &self, - root_node: &castorepb::node::Node, - ) -> Result<(u64, [u8; 32]), Error> { - let path_info = self - .grpc_client - .clone() - .calculate_nar(castorepb::Node { - node: Some(root_node.clone()), - }) - .await - .map_err(|e| Error::StorageError(e.to_string()))? - .into_inner(); - - let nar_sha256: [u8; 32] = path_info - .nar_sha256 - .to_vec() - .try_into() - .map_err(|_e| Error::StorageError("invalid digest length".to_string()))?; - - Ok((path_info.nar_size, nar_sha256)) - } - #[instrument(level = "trace", skip_all)] fn list(&self) -> BoxStream<'static, Result<PathInfo, Error>> { let mut grpc_client = self.grpc_client.clone(); @@ -126,87 +105,45 @@ impl PathInfoService for GRPCPathInfoService { } } +#[async_trait] +impl NarCalculationService for GRPCPathInfoService { + #[instrument(level = "trace", skip_all, fields(root_node = ?root_node))] + async fn calculate_nar( + &self, + root_node: &castorepb::node::Node, + ) -> Result<(u64, [u8; 32]), Error> { + let path_info = self + .grpc_client + .clone() + .calculate_nar(castorepb::Node { + node: Some(root_node.clone()), + }) + .await + .map_err(|e| Error::StorageError(e.to_string()))? + .into_inner(); + + let nar_sha256: [u8; 32] = path_info + .nar_sha256 + .to_vec() + .try_into() + .map_err(|_e| Error::StorageError("invalid digest length".to_string()))?; + + Ok((path_info.nar_size, nar_sha256)) + } +} + #[cfg(test)] mod tests { - use std::sync::Arc; - use std::time::Duration; - - use rstest::*; - use tempfile::TempDir; - use tokio::net::UnixListener; - use tokio_retry::strategy::ExponentialBackoff; - use tokio_retry::Retry; - use tokio_stream::wrappers::UnixListenerStream; - use tvix_castore::blobservice::BlobService; - use tvix_castore::directoryservice::DirectoryService; - - use crate::pathinfoservice::MemoryPathInfoService; - use crate::proto::path_info_service_client::PathInfoServiceClient; - use crate::proto::GRPCPathInfoServiceWrapper; - use crate::tests::fixtures::{self, blob_service, directory_service}; - - use super::GRPCPathInfoService; - use super::PathInfoService; + use crate::pathinfoservice::tests::make_grpc_path_info_service_client; + use crate::tests::fixtures; /// This ensures connecting via gRPC works as expected. - #[rstest] #[tokio::test] - async fn test_valid_unix_path_ping_pong( - blob_service: Arc<dyn BlobService>, - directory_service: Arc<dyn DirectoryService>, - ) { - let tmpdir = TempDir::new().unwrap(); - let socket_path = tmpdir.path().join("daemon"); - - let path_clone = socket_path.clone(); - - // Spin up a server - tokio::spawn(async { - let uds = UnixListener::bind(path_clone).unwrap(); - let uds_stream = UnixListenerStream::new(uds); - - // spin up a new server - let mut server = tonic::transport::Server::builder(); - let router = server.add_service( - crate::proto::path_info_service_server::PathInfoServiceServer::new( - GRPCPathInfoServiceWrapper::new(Box::new(MemoryPathInfoService::new( - blob_service, - directory_service, - )) - as Box<dyn PathInfoService>), - ), - ); - router.serve_with_incoming(uds_stream).await - }); - - // wait for the socket to be created - Retry::spawn( - ExponentialBackoff::from_millis(20).max_delay(Duration::from_secs(10)), - || async { - if socket_path.exists() { - Ok(()) - } else { - Err(()) - } - }, - ) - .await - .expect("failed to wait for socket"); - - // prepare a client - let grpc_client = { - let url = url::Url::parse(&format!("grpc+unix://{}", socket_path.display())) - .expect("must parse"); - let client = PathInfoServiceClient::new( - tvix_castore::tonic::channel_from_url(&url) - .await - .expect("must succeed"), - ); - - GRPCPathInfoService::from_client(client) - }; + async fn test_valid_unix_path_ping_pong() { + let (_blob_service, _directory_service, path_info_service) = + make_grpc_path_info_service_client().await; - let path_info = grpc_client + let path_info = path_info_service .get(fixtures::DUMMY_PATH_DIGEST) .await .expect("must not be error"); diff --git a/tvix/store/src/pathinfoservice/lru.rs b/tvix/store/src/pathinfoservice/lru.rs new file mode 100644 index 0000000000..da674f497a --- /dev/null +++ b/tvix/store/src/pathinfoservice/lru.rs @@ -0,0 +1,128 @@ +use async_stream::try_stream; +use futures::stream::BoxStream; +use lru::LruCache; +use nix_compat::nixbase32; +use std::num::NonZeroUsize; +use std::sync::Arc; +use tokio::sync::RwLock; +use tonic::async_trait; +use tracing::instrument; + +use crate::proto::PathInfo; +use tvix_castore::Error; + +use super::PathInfoService; + +pub struct LruPathInfoService { + lru: Arc<RwLock<LruCache<[u8; 20], PathInfo>>>, +} + +impl LruPathInfoService { + pub fn with_capacity(capacity: NonZeroUsize) -> Self { + Self { + lru: Arc::new(RwLock::new(LruCache::new(capacity))), + } + } +} + +#[async_trait] +impl PathInfoService for LruPathInfoService { + #[instrument(level = "trace", skip_all, fields(path_info.digest = nixbase32::encode(&digest)))] + async fn get(&self, digest: [u8; 20]) -> Result<Option<PathInfo>, Error> { + Ok(self.lru.write().await.get(&digest).cloned()) + } + + #[instrument(level = "trace", skip_all, fields(path_info.root_node = ?path_info.node))] + async fn put(&self, path_info: PathInfo) -> Result<PathInfo, Error> { + // call validate + let store_path = path_info + .validate() + .map_err(|e| Error::InvalidRequest(format!("invalid PathInfo: {}", e)))?; + + self.lru + .write() + .await + .put(*store_path.digest(), path_info.clone()); + + Ok(path_info) + } + + fn list(&self) -> BoxStream<'static, Result<PathInfo, Error>> { + let lru = self.lru.clone(); + Box::pin(try_stream! { + let lru = lru.read().await; + let it = lru.iter(); + + for (_k,v) in it { + yield v.clone() + } + }) + } +} + +#[cfg(test)] +mod test { + use std::num::NonZeroUsize; + + use crate::{ + pathinfoservice::{LruPathInfoService, PathInfoService}, + proto::PathInfo, + tests::fixtures::PATH_INFO_WITH_NARINFO, + }; + use lazy_static::lazy_static; + use tvix_castore::proto as castorepb; + + lazy_static! { + static ref PATHINFO_1: PathInfo = PATH_INFO_WITH_NARINFO.clone(); + static ref PATHINFO_1_DIGEST: [u8; 20] = [0; 20]; + static ref PATHINFO_2: PathInfo = { + let mut p = PATHINFO_1.clone(); + let root_node = p.node.as_mut().unwrap(); + if let castorepb::Node { node: Some(node) } = root_node { + let n = node.to_owned(); + *node = n.rename("11111111111111111111111111111111-dummy2".into()); + } else { + unreachable!() + } + p + }; + static ref PATHINFO_2_DIGEST: [u8; 20] = *(PATHINFO_2.validate().unwrap()).digest(); + } + + #[tokio::test] + async fn evict() { + let svc = LruPathInfoService::with_capacity(NonZeroUsize::new(1).unwrap()); + + // pathinfo_1 should not be there + assert!(svc + .get(*PATHINFO_1_DIGEST) + .await + .expect("no error") + .is_none()); + + // insert it + svc.put(PATHINFO_1.clone()).await.expect("no error"); + + // now it should be there. + assert_eq!( + Some(PATHINFO_1.clone()), + svc.get(*PATHINFO_1_DIGEST).await.expect("no error") + ); + + // insert pathinfo_2. This will evict pathinfo 1 + svc.put(PATHINFO_2.clone()).await.expect("no error"); + + // now pathinfo 2 should be there. + assert_eq!( + Some(PATHINFO_2.clone()), + svc.get(*PATHINFO_2_DIGEST).await.expect("no error") + ); + + // … but pathinfo 1 not anymore. + assert!(svc + .get(*PATHINFO_1_DIGEST) + .await + .expect("no error") + .is_none()); + } +} diff --git a/tvix/store/src/pathinfoservice/memory.rs b/tvix/store/src/pathinfoservice/memory.rs index f8435dbbf8..3de3221df2 100644 --- a/tvix/store/src/pathinfoservice/memory.rs +++ b/tvix/store/src/pathinfoservice/memory.rs @@ -1,40 +1,24 @@ use super::PathInfoService; -use crate::{nar::calculate_size_and_sha256, proto::PathInfo}; -use futures::stream::{iter, BoxStream}; -use std::{ - collections::HashMap, - sync::{Arc, RwLock}, -}; +use crate::proto::PathInfo; +use async_stream::try_stream; +use futures::stream::BoxStream; +use nix_compat::nixbase32; +use std::{collections::HashMap, sync::Arc}; +use tokio::sync::RwLock; use tonic::async_trait; -use tvix_castore::proto as castorepb; +use tracing::instrument; use tvix_castore::Error; -use tvix_castore::{blobservice::BlobService, directoryservice::DirectoryService}; -pub struct MemoryPathInfoService<BS, DS> { +#[derive(Default)] +pub struct MemoryPathInfoService { db: Arc<RwLock<HashMap<[u8; 20], PathInfo>>>, - - blob_service: BS, - directory_service: DS, -} - -impl<BS, DS> MemoryPathInfoService<BS, DS> { - pub fn new(blob_service: BS, directory_service: DS) -> Self { - Self { - db: Default::default(), - blob_service, - directory_service, - } - } } #[async_trait] -impl<BS, DS> PathInfoService for MemoryPathInfoService<BS, DS> -where - BS: AsRef<dyn BlobService> + Send + Sync, - DS: AsRef<dyn DirectoryService> + Send + Sync, -{ +impl PathInfoService for MemoryPathInfoService { + #[instrument(level = "trace", skip_all, fields(path_info.digest = nixbase32::encode(&digest)))] async fn get(&self, digest: [u8; 20]) -> Result<Option<PathInfo>, Error> { - let db = self.db.read().unwrap(); + let db = self.db.read().await; match db.get(&digest) { None => Ok(None), @@ -42,6 +26,7 @@ where } } + #[instrument(level = "trace", skip_all, fields(path_info.root_node = ?path_info.node))] async fn put(&self, path_info: PathInfo) -> Result<PathInfo, Error> { // Call validate on the received PathInfo message. match path_info.validate() { @@ -53,7 +38,7 @@ where // In case the PathInfo is valid, and we were able to extract a NixPath, store it in the database. // This overwrites existing PathInfo objects. Ok(nix_path) => { - let mut db = self.db.write().unwrap(); + let mut db = self.db.write().await; db.insert(*nix_path.digest(), path_info.clone()); Ok(path_info) @@ -61,24 +46,16 @@ where } } - async fn calculate_nar( - &self, - root_node: &castorepb::node::Node, - ) -> Result<(u64, [u8; 32]), Error> { - calculate_size_and_sha256(root_node, &self.blob_service, &self.directory_service) - .await - .map_err(|e| Error::StorageError(e.to_string())) - } - fn list(&self) -> BoxStream<'static, Result<PathInfo, Error>> { - let db = self.db.read().unwrap(); + let db = self.db.clone(); - // Copy all elements into a list. - // This is a bit ugly, because we can't have db escape the lifetime - // of this function, but elements need to be returned owned anyways, and this in- - // memory impl is only for testing purposes anyways. - let items: Vec<_> = db.iter().map(|(_k, v)| Ok(v.clone())).collect(); + Box::pin(try_stream! { + let db = db.read().await; + let it = db.iter(); - Box::pin(iter(items)) + for (_k, v) in it { + yield v.clone() + } + }) } } diff --git a/tvix/store/src/pathinfoservice/mod.rs b/tvix/store/src/pathinfoservice/mod.rs index c1a482bbb5..574bcc0b8b 100644 --- a/tvix/store/src/pathinfoservice/mod.rs +++ b/tvix/store/src/pathinfoservice/mod.rs @@ -1,5 +1,7 @@ +mod combinators; mod from_addr; mod grpc; +mod lru; mod memory; mod nix_http; mod sled; @@ -12,13 +14,14 @@ mod tests; use futures::stream::BoxStream; use tonic::async_trait; -use tvix_castore::proto as castorepb; use tvix_castore::Error; use crate::proto::PathInfo; +pub use self::combinators::Cache as CachePathInfoService; pub use self::from_addr::from_addr; pub use self::grpc::GRPCPathInfoService; +pub use self::lru::LruPathInfoService; pub use self::memory::MemoryPathInfoService; pub use self::nix_http::NixHTTPPathInfoService; pub use self::sled::SledPathInfoService; @@ -41,14 +44,6 @@ pub trait PathInfoService: Send + Sync { /// invalid messages. async fn put(&self, path_info: PathInfo) -> Result<PathInfo, Error>; - /// Return the nar size and nar sha256 digest for a given root node. - /// This can be used to calculate NAR-based output paths, - /// and implementations are encouraged to cache it. - async fn calculate_nar( - &self, - root_node: &castorepb::node::Node, - ) -> Result<(u64, [u8; 32]), Error>; - /// Iterate over all PathInfo objects in the store. /// Implementations can decide to disallow listing. /// @@ -72,13 +67,6 @@ where self.as_ref().put(path_info).await } - async fn calculate_nar( - &self, - root_node: &castorepb::node::Node, - ) -> Result<(u64, [u8; 32]), Error> { - self.as_ref().calculate_nar(root_node).await - } - fn list(&self) -> BoxStream<'static, Result<PathInfo, Error>> { self.as_ref().list() } diff --git a/tvix/store/src/pathinfoservice/nix_http.rs b/tvix/store/src/pathinfoservice/nix_http.rs index bdb0e2c3cb..cccd4805c6 100644 --- a/tvix/store/src/pathinfoservice/nix_http.rs +++ b/tvix/store/src/pathinfoservice/nix_http.rs @@ -1,6 +1,3 @@ -use std::io::{self, BufRead, Read, Write}; - -use data_encoding::BASE64; use futures::{stream::BoxStream, TryStreamExt}; use nix_compat::{ narinfo::{self, NarInfo}, @@ -8,7 +5,10 @@ use nix_compat::{ nixhash::NixHash, }; use reqwest::StatusCode; -use sha2::{digest::FixedOutput, Digest, Sha256}; +use sha2::Digest; +use std::io::{self, Write}; +use tokio::io::{AsyncRead, BufReader}; +use tokio_util::io::InspectReader; use tonic::async_trait; use tracing::{debug, instrument, warn}; use tvix_castore::{ @@ -32,8 +32,7 @@ use super::PathInfoService; /// /// The client is expected to be (indirectly) using the same [BlobService] and /// [DirectoryService], so able to fetch referred Directories and Blobs. -/// [PathInfoService::put] and [PathInfoService::calculate_nar] are not -/// implemented and return an error if called. +/// [PathInfoService::put] is not implemented and returns an error if called. /// TODO: what about reading from nix-cache-info? pub struct NixHTTPPathInfoService<BS, DS> { base_url: url::Url, @@ -71,7 +70,7 @@ where BS: AsRef<dyn BlobService> + Send + Sync + Clone + 'static, DS: AsRef<dyn DirectoryService> + Send + Sync + Clone + 'static, { - #[instrument(skip_all, err, fields(path.digest=BASE64.encode(&digest)))] + #[instrument(skip_all, err, fields(path.digest=nixbase32::encode(&digest)))] async fn get(&self, digest: [u8; 20]) -> Result<Option<PathInfo>, Error> { let narinfo_url = self .base_url @@ -171,85 +170,83 @@ where ))); } - // get an AsyncRead of the response body. - let async_r = tokio_util::io::StreamReader::new(resp.bytes_stream().map_err(|e| { + // get a reader of the response body. + let r = tokio_util::io::StreamReader::new(resp.bytes_stream().map_err(|e| { let e = e.without_url(); warn!(e=%e, "failed to get response body"); io::Error::new(io::ErrorKind::BrokenPipe, e.to_string()) })); - let sync_r = tokio_util::io::SyncIoBridge::new(async_r); - // handle decompression, by wrapping the reader. - let sync_r: Box<dyn BufRead + Send> = match narinfo.compression { - Some("none") => Box::new(sync_r), - Some("xz") => Box::new(io::BufReader::new(xz2::read::XzDecoder::new(sync_r))), - Some(comp) => { - return Err(Error::InvalidRequest( - format!("unsupported compression: {}", comp).to_string(), - )) - } - None => { - return Err(Error::InvalidRequest( - "unsupported compression: bzip2".to_string(), - )) + // handle decompression, depending on the compression field. + let r: Box<dyn AsyncRead + Send + Unpin> = match narinfo.compression { + Some("none") => Box::new(r) as Box<dyn AsyncRead + Send + Unpin>, + Some("bzip2") | None => Box::new(async_compression::tokio::bufread::BzDecoder::new(r)) + as Box<dyn AsyncRead + Send + Unpin>, + Some("gzip") => Box::new(async_compression::tokio::bufread::GzipDecoder::new(r)) + as Box<dyn AsyncRead + Send + Unpin>, + Some("xz") => Box::new(async_compression::tokio::bufread::XzDecoder::new(r)) + as Box<dyn AsyncRead + Send + Unpin>, + Some("zstd") => Box::new(async_compression::tokio::bufread::ZstdDecoder::new(r)) + as Box<dyn AsyncRead + Send + Unpin>, + Some(comp_str) => { + return Err(Error::StorageError(format!( + "unsupported compression: {comp_str}" + ))); } }; - - let res = tokio::task::spawn_blocking({ - let blob_service = self.blob_service.clone(); - let directory_service = self.directory_service.clone(); - move || -> io::Result<_> { - // Wrap the reader once more, so we can calculate NarSize and NarHash - let mut sync_r = io::BufReader::new(NarReader::from(sync_r)); - let root_node = crate::nar::read_nar(&mut sync_r, blob_service, directory_service)?; - - let (_, nar_hash, nar_size) = sync_r.into_inner().into_inner(); - - Ok((root_node, nar_hash, nar_size)) - } - }) + let mut nar_hash = sha2::Sha256::new(); + let mut nar_size = 0; + + // Assemble NarHash and NarSize as we read bytes. + let r = InspectReader::new(r, |b| { + nar_size += b.len() as u64; + nar_hash.write_all(b).unwrap(); + }); + + // HACK: InspectReader doesn't implement AsyncBufRead, but neither do our decompressors. + let mut r = BufReader::new(r); + + let root_node = crate::nar::ingest_nar( + self.blob_service.clone(), + self.directory_service.clone(), + &mut r, + ) .await - .unwrap(); - - match res { - Ok((root_node, nar_hash, nar_size)) => { - // ensure the ingested narhash and narsize do actually match. - if narinfo.nar_size != nar_size { - warn!( - narinfo.nar_size = narinfo.nar_size, - http.nar_size = nar_size, - "NARSize mismatch" - ); - Err(io::Error::new( - io::ErrorKind::InvalidData, - "NarSize mismatch".to_string(), - ))?; - } - if narinfo.nar_hash != nar_hash { - warn!( - narinfo.nar_hash = %NixHash::Sha256(narinfo.nar_hash), - http.nar_hash = %NixHash::Sha256(nar_hash), - "NarHash mismatch" - ); - Err(io::Error::new( - io::ErrorKind::InvalidData, - "NarHash mismatch".to_string(), - ))?; - } - - Ok(Some(PathInfo { - node: Some(castorepb::Node { - // set the name of the root node to the digest-name of the store path. - node: Some( - root_node.rename(narinfo.store_path.to_string().to_owned().into()), - ), - }), - references: pathinfo.references, - narinfo: pathinfo.narinfo, - })) - } - Err(e) => Err(e.into()), + .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; + + // ensure the ingested narhash and narsize do actually match. + if narinfo.nar_size != nar_size { + warn!( + narinfo.nar_size = narinfo.nar_size, + http.nar_size = nar_size, + "NarSize mismatch" + ); + Err(io::Error::new( + io::ErrorKind::InvalidData, + "NarSize mismatch".to_string(), + ))?; + } + let nar_hash: [u8; 32] = nar_hash.finalize().into(); + if narinfo.nar_hash != nar_hash { + warn!( + narinfo.nar_hash = %NixHash::Sha256(narinfo.nar_hash), + http.nar_hash = %NixHash::Sha256(nar_hash), + "NarHash mismatch" + ); + Err(io::Error::new( + io::ErrorKind::InvalidData, + "NarHash mismatch".to_string(), + ))?; } + + Ok(Some(PathInfo { + node: Some(castorepb::Node { + // set the name of the root node to the digest-name of the store path. + node: Some(root_node.rename(narinfo.store_path.to_string().to_owned().into())), + }), + references: pathinfo.references, + narinfo: pathinfo.narinfo, + })) } #[instrument(skip_all, fields(path_info=?_path_info))] @@ -259,16 +256,6 @@ where )) } - #[instrument(skip_all, fields(root_node=?root_node))] - async fn calculate_nar( - &self, - root_node: &castorepb::node::Node, - ) -> Result<(u64, [u8; 32]), Error> { - Err(Error::InvalidRequest( - "calculate_nar not supported for this backend".to_string(), - )) - } - fn list(&self) -> BoxStream<'static, Result<PathInfo, Error>> { Box::pin(futures::stream::once(async { Err(Error::InvalidRequest( @@ -277,38 +264,3 @@ where })) } } - -/// Small helper reader implementing [std::io::Read]. -/// It can be used to wrap another reader, counts the number of bytes read -/// and the sha256 digest of the contents. -struct NarReader<R: Read> { - r: R, - - sha256: sha2::Sha256, - bytes_read: u64, -} - -impl<R: Read> NarReader<R> { - pub fn from(inner: R) -> Self { - Self { - r: inner, - sha256: Sha256::new(), - bytes_read: 0, - } - } - - /// Returns the (remaining) inner reader, the sha256 digest and the number of bytes read. - pub fn into_inner(self) -> (R, [u8; 32], u64) { - (self.r, self.sha256.finalize_fixed().into(), self.bytes_read) - } -} - -impl<R: Read> Read for NarReader<R> { - fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { - self.r.read(buf).map(|n| { - self.bytes_read += n as u64; - self.sha256.write_all(&buf[..n]).unwrap(); - n - }) - } -} diff --git a/tvix/store/src/pathinfoservice/sled.rs b/tvix/store/src/pathinfoservice/sled.rs index 7b6d7fd7ab..eb3cf2ff1b 100644 --- a/tvix/store/src/pathinfoservice/sled.rs +++ b/tvix/store/src/pathinfoservice/sled.rs @@ -1,140 +1,117 @@ use super::PathInfoService; -use crate::nar::calculate_size_and_sha256; use crate::proto::PathInfo; -use futures::stream::iter; +use async_stream::try_stream; use futures::stream::BoxStream; +use nix_compat::nixbase32; use prost::Message; use std::path::Path; use tonic::async_trait; +use tracing::instrument; use tracing::warn; -use tvix_castore::proto as castorepb; -use tvix_castore::{blobservice::BlobService, directoryservice::DirectoryService, Error}; +use tvix_castore::Error; /// SledPathInfoService stores PathInfo in a [sled](https://github.com/spacejam/sled). /// /// The PathInfo messages are stored as encoded protos, and keyed by their output hash, /// as that's currently the only request type available. -pub struct SledPathInfoService<BS, DS> { +pub struct SledPathInfoService { db: sled::Db, - - blob_service: BS, - directory_service: DS, } -impl<BS, DS> SledPathInfoService<BS, DS> { - pub fn new<P: AsRef<Path>>( - p: P, - blob_service: BS, - directory_service: DS, - ) -> Result<Self, sled::Error> { +impl SledPathInfoService { + pub fn new<P: AsRef<Path>>(p: P) -> Result<Self, sled::Error> { let config = sled::Config::default() .use_compression(false) // is a required parameter .path(p); let db = config.open()?; - Ok(Self { - db, - blob_service, - directory_service, - }) + Ok(Self { db }) } - pub fn new_temporary(blob_service: BS, directory_service: DS) -> Result<Self, sled::Error> { + pub fn new_temporary() -> Result<Self, sled::Error> { let config = sled::Config::default().temporary(true); let db = config.open()?; - Ok(Self { - db, - blob_service, - directory_service, - }) + Ok(Self { db }) } } #[async_trait] -impl<BS, DS> PathInfoService for SledPathInfoService<BS, DS> -where - BS: AsRef<dyn BlobService> + Send + Sync, - DS: AsRef<dyn DirectoryService> + Send + Sync, -{ +impl PathInfoService for SledPathInfoService { + #[instrument(level = "trace", skip_all, fields(path_info.digest = nixbase32::encode(&digest)))] async fn get(&self, digest: [u8; 20]) -> Result<Option<PathInfo>, Error> { - match self.db.get(digest) { - Ok(None) => Ok(None), - Ok(Some(data)) => match PathInfo::decode(&*data) { - Ok(path_info) => Ok(Some(path_info)), - Err(e) => { + let resp = tokio::task::spawn_blocking({ + let db = self.db.clone(); + move || db.get(digest.as_slice()) + }) + .await? + .map_err(|e| { + warn!("failed to retrieve PathInfo: {}", e); + Error::StorageError(format!("failed to retrieve PathInfo: {}", e)) + })?; + match resp { + None => Ok(None), + Some(data) => { + let path_info = PathInfo::decode(&*data).map_err(|e| { warn!("failed to decode stored PathInfo: {}", e); - Err(Error::StorageError(format!( - "failed to decode stored PathInfo: {}", - e - ))) - } - }, - Err(e) => { - warn!("failed to retrieve PathInfo: {}", e); - Err(Error::StorageError(format!( - "failed to retrieve PathInfo: {}", - e - ))) + Error::StorageError(format!("failed to decode stored PathInfo: {}", e)) + })?; + Ok(Some(path_info)) } } } + #[instrument(level = "trace", skip_all, fields(path_info.root_node = ?path_info.node))] async fn put(&self, path_info: PathInfo) -> Result<PathInfo, Error> { // Call validate on the received PathInfo message. - match path_info.validate() { - Err(e) => Err(Error::InvalidRequest(format!( - "failed to validate PathInfo: {}", - e - ))), - // In case the PathInfo is valid, and we were able to extract a NixPath, store it in the database. - // This overwrites existing PathInfo objects. - Ok(nix_path) => match self - .db - .insert(*nix_path.digest(), path_info.encode_to_vec()) - { - Ok(_) => Ok(path_info), - Err(e) => { - warn!("failed to insert PathInfo: {}", e); - Err(Error::StorageError(format! { - "failed to insert PathInfo: {}", e - })) - } - }, - } - } + let store_path = path_info + .validate() + .map_err(|e| Error::InvalidRequest(format!("failed to validate PathInfo: {}", e)))?; + + // In case the PathInfo is valid, we were able to parse a StorePath. + // Store it in the database, keyed by its digest. + // This overwrites existing PathInfo objects. + tokio::task::spawn_blocking({ + let db = self.db.clone(); + let k = *store_path.digest(); + let data = path_info.encode_to_vec(); + move || db.insert(k, data) + }) + .await? + .map_err(|e| { + warn!("failed to insert PathInfo: {}", e); + Error::StorageError(format! { + "failed to insert PathInfo: {}", e + }) + })?; - async fn calculate_nar( - &self, - root_node: &castorepb::node::Node, - ) -> Result<(u64, [u8; 32]), Error> { - calculate_size_and_sha256(root_node, &self.blob_service, &self.directory_service) - .await - .map_err(|e| Error::StorageError(e.to_string())) + Ok(path_info) } fn list(&self) -> BoxStream<'static, Result<PathInfo, Error>> { - Box::pin(iter(self.db.iter().values().map(|v| match v { - Ok(data) => { - // we retrieved some bytes - match PathInfo::decode(&*data) { - Ok(path_info) => Ok(path_info), - Err(e) => { - warn!("failed to decode stored PathInfo: {}", e); - Err(Error::StorageError(format!( - "failed to decode stored PathInfo: {}", - e - ))) - } - } - } - Err(e) => { - warn!("failed to retrieve PathInfo: {}", e); - Err(Error::StorageError(format!( - "failed to retrieve PathInfo: {}", - e - ))) + let db = self.db.clone(); + let mut it = db.iter().values(); + + Box::pin(try_stream! { + // Don't block the executor while waiting for .next(), so wrap that + // in a spawn_blocking call. + // We need to pass around it to be able to reuse it. + while let (Some(elem), new_it) = tokio::task::spawn_blocking(move || { + (it.next(), it) + }).await? { + it = new_it; + let data = elem.map_err(|e| { + warn!("failed to retrieve PathInfo: {}", e); + Error::StorageError(format!("failed to retrieve PathInfo: {}", e)) + })?; + + let path_info = PathInfo::decode(&*data).map_err(|e| { + warn!("failed to decode stored PathInfo: {}", e); + Error::StorageError(format!("failed to decode stored PathInfo: {}", e)) + })?; + + yield path_info } - }))) + }) } } diff --git a/tvix/store/src/pathinfoservice/tests/mod.rs b/tvix/store/src/pathinfoservice/tests/mod.rs index c9b9a06377..26166d1b75 100644 --- a/tvix/store/src/pathinfoservice/tests/mod.rs +++ b/tvix/store/src/pathinfoservice/tests/mod.rs @@ -13,7 +13,7 @@ use crate::proto::PathInfo; use crate::tests::fixtures::DUMMY_PATH_DIGEST; mod utils; -use self::utils::make_grpc_path_info_service_client; +pub use self::utils::make_grpc_path_info_service_client; /// Convenience type alias batching all three servives together. #[allow(clippy::upper_case_acronyms)] @@ -51,7 +51,7 @@ pub async fn make_path_info_service(uri: &str) -> BSDSPS { #[case::memory(make_path_info_service("memory://").await)] #[case::grpc(make_grpc_path_info_service_client().await)] #[case::sled(make_path_info_service("sled://").await)] -#[cfg_attr(feature = "cloud", case::bigtable(make_path_info_service("bigtable://instance-1?project_id=project-1&table_name=table-1&family_name=cf1").await))] +#[cfg_attr(all(feature = "cloud",feature="integration"), case::bigtable(make_path_info_service("bigtable://instance-1?project_id=project-1&table_name=table-1&family_name=cf1").await))] pub fn path_info_services( #[case] services: ( impl BlobService, diff --git a/tvix/store/src/pathinfoservice/tests/utils.rs b/tvix/store/src/pathinfoservice/tests/utils.rs index 31ec57aade..30c5902b61 100644 --- a/tvix/store/src/pathinfoservice/tests/utils.rs +++ b/tvix/store/src/pathinfoservice/tests/utils.rs @@ -3,6 +3,7 @@ use std::sync::Arc; use tonic::transport::{Endpoint, Server, Uri}; use crate::{ + nar::{NarCalculationService, SimpleRenderer}, pathinfoservice::{GRPCPathInfoService, MemoryPathInfoService, PathInfoService}, proto::{ path_info_service_client::PathInfoServiceClient, @@ -26,12 +27,15 @@ pub async fn make_grpc_path_info_service_client() -> super::BSDSPS { let directory_service = directory_service.clone(); async move { let path_info_service: Arc<dyn PathInfoService> = - Arc::from(MemoryPathInfoService::new(blob_service, directory_service)); + Arc::from(MemoryPathInfoService::default()); + let nar_calculation_service = + Box::new(SimpleRenderer::new(blob_service, directory_service)) + as Box<dyn NarCalculationService>; - // spin up a new DirectoryService + // spin up a new PathInfoService let mut server = Server::builder(); let router = server.add_service(PathInfoServiceServer::new( - GRPCPathInfoServiceWrapper::new(path_info_service), + GRPCPathInfoServiceWrapper::new(path_info_service, nar_calculation_service), )); router diff --git a/tvix/store/src/proto/grpc_pathinfoservice_wrapper.rs b/tvix/store/src/proto/grpc_pathinfoservice_wrapper.rs index 9f45818227..68f5575676 100644 --- a/tvix/store/src/proto/grpc_pathinfoservice_wrapper.rs +++ b/tvix/store/src/proto/grpc_pathinfoservice_wrapper.rs @@ -1,4 +1,4 @@ -use crate::nar::RenderError; +use crate::nar::{NarCalculationService, RenderError}; use crate::pathinfoservice::PathInfoService; use crate::proto; use futures::{stream::BoxStream, TryStreamExt}; @@ -7,23 +7,26 @@ use tonic::{async_trait, Request, Response, Result, Status}; use tracing::{instrument, warn}; use tvix_castore::proto as castorepb; -pub struct GRPCPathInfoServiceWrapper<PS> { - inner: PS, +pub struct GRPCPathInfoServiceWrapper<PS, NS> { + path_info_service: PS, // FUTUREWORK: allow exposing without allowing listing + nar_calculation_service: NS, } -impl<PS> GRPCPathInfoServiceWrapper<PS> { - pub fn new(path_info_service: PS) -> Self { +impl<PS, NS> GRPCPathInfoServiceWrapper<PS, NS> { + pub fn new(path_info_service: PS, nar_calculation_service: NS) -> Self { Self { - inner: path_info_service, + path_info_service, + nar_calculation_service, } } } #[async_trait] -impl<PS> proto::path_info_service_server::PathInfoService for GRPCPathInfoServiceWrapper<PS> +impl<PS, NS> proto::path_info_service_server::PathInfoService for GRPCPathInfoServiceWrapper<PS, NS> where PS: Deref<Target = dyn PathInfoService> + Send + Sync + 'static, + NS: NarCalculationService + Send + Sync + 'static, { type ListStream = BoxStream<'static, tonic::Result<proto::PathInfo, Status>>; @@ -39,7 +42,7 @@ where .to_vec() .try_into() .map_err(|_e| Status::invalid_argument("invalid output digest length"))?; - match self.inner.get(digest).await { + match self.path_info_service.get(digest).await { Ok(None) => Err(Status::not_found("PathInfo not found")), Ok(Some(path_info)) => Ok(Response::new(path_info)), Err(e) => { @@ -57,7 +60,7 @@ where // Store the PathInfo in the client. Clients MUST validate the data // they receive, so we don't validate additionally here. - match self.inner.put(path_info).await { + match self.path_info_service.put(path_info).await { Ok(path_info_new) => Ok(Response::new(path_info_new)), Err(e) => { warn!(err = %e, "failed to put PathInfo"); @@ -79,7 +82,7 @@ where Err(Status::invalid_argument("invalid root node"))? } - match self.inner.calculate_nar(&root_node).await { + match self.nar_calculation_service.calculate_nar(&root_node).await { Ok((nar_size, nar_sha256)) => Ok(Response::new(proto::CalculateNarResponse { nar_size, nar_sha256: nar_sha256.to_vec().into(), @@ -99,7 +102,7 @@ where _request: Request<proto::ListPathInfoRequest>, ) -> Result<Response<Self::ListStream>, Status> { let stream = Box::pin( - self.inner + self.path_info_service .list() .map_err(|e| Status::internal(e.to_string())), ); diff --git a/tvix/store/src/utils.rs b/tvix/store/src/utils.rs index 0b171377bd..e6e42f6ec4 100644 --- a/tvix/store/src/utils.rs +++ b/tvix/store/src/utils.rs @@ -10,9 +10,10 @@ use tvix_castore::{ directoryservice::{self, DirectoryService}, }; +use crate::nar::{NarCalculationService, SimpleRenderer}; use crate::pathinfoservice::{self, PathInfoService}; -/// Construct the three store handles from their addrs. +/// Construct the store handles from their addrs. pub async fn construct_services( blob_service_addr: impl AsRef<str>, directory_service_addr: impl AsRef<str>, @@ -21,6 +22,7 @@ pub async fn construct_services( Arc<dyn BlobService>, Arc<dyn DirectoryService>, Box<dyn PathInfoService>, + Box<dyn NarCalculationService>, )> { let blob_service: Arc<dyn BlobService> = blobservice::from_addr(blob_service_addr.as_ref()) .await? @@ -36,7 +38,18 @@ pub async fn construct_services( ) .await?; - Ok((blob_service, directory_service, path_info_service)) + // TODO: grpc client also implements NarCalculationService + let nar_calculation_service = Box::new(SimpleRenderer::new( + blob_service.clone(), + directory_service.clone(), + )) as Box<dyn NarCalculationService>; + + Ok(( + blob_service, + directory_service, + path_info_service, + nar_calculation_service, + )) } /// The inverse of [tokio_util::io::SyncIoBridge]. diff --git a/tvix/tools/crunch-v2/Cargo.lock b/tvix/tools/crunch-v2/Cargo.lock index cff5509d0b..3748d7e4e9 100644 --- a/tvix/tools/crunch-v2/Cargo.lock +++ b/tvix/tools/crunch-v2/Cargo.lock @@ -752,6 +752,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" [[package]] +name = "enum-primitive-derive" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba7795da175654fe16979af73f81f26a8ea27638d8d9823d317016888a63dc4c" +dependencies = [ + "num-traits", + "quote", + "syn 2.0.39", +] + +[[package]] name = "enum_dispatch" version = "0.3.12" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -1394,12 +1405,16 @@ dependencies = [ "data-encoding", "ed25519", "ed25519-dalek", + "enum-primitive-derive", "glob", "nom", + "num-traits", + "pin-project-lite", "serde", "serde_json", "sha2 0.10.8", "thiserror", + "tokio", ] [[package]] @@ -1432,9 +1447,9 @@ dependencies = [ [[package]] name = "num-traits" -version = "0.2.17" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", "libm", @@ -2682,9 +2697,9 @@ dependencies = [ [[package]] name = "tokio" -version = "1.34.0" +version = "1.37.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0c014766411e834f7af5b8f4cf46257aab4036ca95e9d2c144a10f59ad6f5b9" +checksum = "1adbebffeca75fcfd058afa480fb6c0b81e165a0323f9c9d39c9697e37c46787" dependencies = [ "backtrace", "bytes", diff --git a/tvix/tools/crunch-v2/Cargo.toml b/tvix/tools/crunch-v2/Cargo.toml index 1e3f025250..d2b7126bd2 100644 --- a/tvix/tools/crunch-v2/Cargo.toml +++ b/tvix/tools/crunch-v2/Cargo.toml @@ -14,7 +14,7 @@ bstr = "1.8.0" bytes = "1.5.0" futures = "0.3.29" -tokio = { version = "1.34.0", features = ["full"] } +tokio = { version = "1.37.0", features = ["full"] } rusoto_core = { version = "0.48.0", default-features = false, features = ["hyper-rustls"] } rusoto_s3 = { version = "0.48.0", default-features = false, features = ["rustls"] } diff --git a/tvix/tools/crunch-v2/src/main.rs b/tvix/tools/crunch-v2/src/main.rs index a5d538f6be..5be8c28e29 100644 --- a/tvix/tools/crunch-v2/src/main.rs +++ b/tvix/tools/crunch-v2/src/main.rs @@ -147,7 +147,7 @@ fn ingest(node: nar::Node, name: Vec<u8>, avg_chunk_size: u32) -> Result<proto:: let mut symlinks = vec![]; while let Some(node) = reader.next()? { - match ingest(node.node, node.name, avg_chunk_size)? { + match ingest(node.node, node.name.to_owned(), avg_chunk_size)? { proto::path::Node::Directory(node) => { directories.push(node); } diff --git a/tvix/tools/narinfo2parquet/Cargo.lock b/tvix/tools/narinfo2parquet/Cargo.lock index e59f70732d..070a468510 100644 --- a/tvix/tools/narinfo2parquet/Cargo.lock +++ b/tvix/tools/narinfo2parquet/Cargo.lock @@ -487,6 +487,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" [[package]] +name = "enum-primitive-derive" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba7795da175654fe16979af73f81f26a8ea27638d8d9823d317016888a63dc4c" +dependencies = [ + "num-traits", + "quote", + "syn 2.0.39", +] + +[[package]] name = "enum_dispatch" version = "0.3.12" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -878,9 +889,9 @@ dependencies = [ [[package]] name = "mio" -version = "0.8.9" +version = "0.8.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3dce281c5e46beae905d4de1870d8b1509a9142b62eedf18b443b011ca8343d0" +checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" dependencies = [ "libc", "wasi", @@ -930,12 +941,16 @@ dependencies = [ "data-encoding", "ed25519", "ed25519-dalek", + "enum-primitive-derive", "glob", "nom", + "num-traits", + "pin-project-lite", "serde", "serde_json", "sha2", "thiserror", + "tokio", ] [[package]] @@ -968,9 +983,9 @@ dependencies = [ [[package]] name = "num-traits" -version = "0.2.17" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", "libm", @@ -1805,9 +1820,9 @@ dependencies = [ [[package]] name = "tokio" -version = "1.33.0" +version = "1.37.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f38200e3ef7995e5ef13baec2f432a6da0aa9ac495b2c0e8f3b7eec2c92d653" +checksum = "1adbebffeca75fcfd058afa480fb6c0b81e165a0323f9c9d39c9697e37c46787" dependencies = [ "backtrace", "bytes", @@ -1816,10 +1831,22 @@ dependencies = [ "num_cpus", "pin-project-lite", "socket2", + "tokio-macros", "windows-sys", ] [[package]] +name = "tokio-macros" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.39", +] + +[[package]] name = "tokio-util" version = "0.7.10" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -2091,18 +2118,18 @@ checksum = "9828b178da53440fa9c766a3d2f73f7cf5d0ac1fe3980c1e5018d899fd19e07b" [[package]] name = "zerocopy" -version = "0.7.25" +version = "0.7.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8cd369a67c0edfef15010f980c3cbe45d7f651deac2cd67ce097cd801de16557" +checksum = "ae87e3fcd617500e5d106f0380cf7b77f3c6092aae37191433159dda23cfb087" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.7.25" +version = "0.7.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2f140bda219a26ccc0cdb03dba58af72590c53b22642577d88a927bc5c87d6b" +checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b" dependencies = [ "proc-macro2", "quote", diff --git a/tvix/tools/narinfo2parquet/Cargo.nix b/tvix/tools/narinfo2parquet/Cargo.nix index 27a5d684b6..0ae873c844 100644 --- a/tvix/tools/narinfo2parquet/Cargo.nix +++ b/tvix/tools/narinfo2parquet/Cargo.nix @@ -1,4 +1,4 @@ -# This file was @generated by crate2nix 0.13.0 with the command: +# This file was @generated by crate2nix 0.14.0 with the command: # "generate" "--all-features" # See https://github.com/kolloch/crate2nix for more info. @@ -1429,6 +1429,32 @@ rec { }; resolvedDefaultFeatures = [ "default" "use_std" ]; }; + "enum-primitive-derive" = rec { + crateName = "enum-primitive-derive"; + version = "0.3.0"; + edition = "2018"; + sha256 = "0k6wcf58h5kh64yq5nfq71va53kaya0kzxwsjwbgwm2n2zd9axxs"; + procMacro = true; + authors = [ + "Doug Goldstein <cardoe@cardoe.com>" + ]; + dependencies = [ + { + name = "num-traits"; + packageId = "num-traits"; + usesDefaultFeatures = false; + } + { + name = "quote"; + packageId = "quote"; + } + { + name = "syn"; + packageId = "syn 2.0.39"; + } + ]; + + }; "enum_dispatch" = rec { crateName = "enum_dispatch"; version = "0.3.12"; @@ -2511,9 +2537,9 @@ rec { }; "mio" = rec { crateName = "mio"; - version = "0.8.9"; + version = "0.8.11"; edition = "2018"; - sha256 = "1l23hg513c23nhcdzvk25caaj28mic6qgqadbn8axgj6bqf2ikix"; + sha256 = "034byyl0ardml5yliy1hmvx8arkmn9rv479pid794sm07ia519m4"; authors = [ "Carl Lerche <me@carllerche.com>" "Thomas de Zeeuw <thomasdezeeuw@gmail.com>" @@ -2689,6 +2715,10 @@ rec { packageId = "ed25519-dalek"; } { + name = "enum-primitive-derive"; + packageId = "enum-primitive-derive"; + } + { name = "glob"; packageId = "glob"; } @@ -2697,6 +2727,15 @@ rec { packageId = "nom"; } { + name = "num-traits"; + packageId = "num-traits"; + } + { + name = "pin-project-lite"; + packageId = "pin-project-lite"; + optional = true; + } + { name = "serde"; packageId = "serde"; features = [ "derive" ]; @@ -2713,6 +2752,12 @@ rec { name = "thiserror"; packageId = "thiserror"; } + { + name = "tokio"; + packageId = "tokio"; + optional = true; + features = [ "io-util" "macros" ]; + } ]; devDependencies = [ { @@ -2721,9 +2766,13 @@ rec { } ]; features = { - "async" = [ "futures-util" ]; - "futures-util" = [ "dep:futures-util" ]; + "async" = [ "tokio" ]; + "default" = [ "async" "wire" ]; + "pin-project-lite" = [ "dep:pin-project-lite" ]; + "tokio" = [ "dep:tokio" ]; + "wire" = [ "tokio" "pin-project-lite" ]; }; + resolvedDefaultFeatures = [ "async" "default" "pin-project-lite" "tokio" "wire" ]; }; "nom" = rec { crateName = "nom"; @@ -2792,9 +2841,9 @@ rec { }; "num-traits" = rec { crateName = "num-traits"; - version = "0.2.17"; - edition = "2018"; - sha256 = "0z16bi5zwgfysz6765v3rd6whfbjpihx3mhsn4dg8dzj2c221qrr"; + version = "0.2.19"; + edition = "2021"; + sha256 = "0h984rhdkkqd4ny9cif7y2azl3xdfb7768hb9irhpsch4q3gq787"; authors = [ "The Rust Project Developers" ]; @@ -6026,9 +6075,9 @@ rec { }; "tokio" = rec { crateName = "tokio"; - version = "1.33.0"; + version = "1.37.0"; edition = "2021"; - sha256 = "0lynj8nfqziviw72qns9mjlhmnm66bsc5bivy5g5x6gp7q720f2g"; + sha256 = "11v7qhvpwsf976frqgrjl1jy308bdkxq195gb38cypx7xkzypnqs"; authors = [ "Tokio Contributors <team@tokio.rs>" ]; @@ -6072,6 +6121,11 @@ rec { features = [ "all" ]; } { + name = "tokio-macros"; + packageId = "tokio-macros"; + optional = true; + } + { name = "windows-sys"; packageId = "windows-sys"; optional = true; @@ -6116,7 +6170,33 @@ rec { "tracing" = [ "dep:tracing" ]; "windows-sys" = [ "dep:windows-sys" ]; }; - resolvedDefaultFeatures = [ "bytes" "default" "io-util" "libc" "mio" "net" "num_cpus" "rt" "rt-multi-thread" "socket2" "sync" "time" "windows-sys" ]; + resolvedDefaultFeatures = [ "bytes" "default" "io-util" "libc" "macros" "mio" "net" "num_cpus" "rt" "rt-multi-thread" "socket2" "sync" "time" "tokio-macros" "windows-sys" ]; + }; + "tokio-macros" = rec { + crateName = "tokio-macros"; + version = "2.2.0"; + edition = "2021"; + sha256 = "0fwjy4vdx1h9pi4g2nml72wi0fr27b5m954p13ji9anyy8l1x2jv"; + procMacro = true; + authors = [ + "Tokio Contributors <team@tokio.rs>" + ]; + dependencies = [ + { + name = "proc-macro2"; + packageId = "proc-macro2"; + } + { + name = "quote"; + packageId = "quote"; + } + { + name = "syn"; + packageId = "syn 2.0.39"; + features = [ "full" ]; + } + ]; + }; "tokio-util" = rec { crateName = "tokio-util"; @@ -7673,9 +7753,9 @@ rec { }; "zerocopy" = rec { crateName = "zerocopy"; - version = "0.7.25"; + version = "0.7.34"; edition = "2018"; - sha256 = "0mv5w4fq1kcpw1ydcb5cvr8zdms5pqy0r60g04ayzpqfgjk6klwc"; + sha256 = "11xhrwixm78m6ca1jdxf584wdwvpgg7q00vg21fhwl0psvyf71xf"; authors = [ "Joshua Liebow-Feeser <joshlf@google.com>" ]; @@ -7709,9 +7789,9 @@ rec { }; "zerocopy-derive" = rec { crateName = "zerocopy-derive"; - version = "0.7.25"; + version = "0.7.34"; edition = "2018"; - sha256 = "0svxr32pp4lav1vjar127g2r09gpiajxn0yv1k66r8hrlayl1wf2"; + sha256 = "0fqvglw01w3hp7xj9gdk1800x9j7v58s9w8ijiyiz2a7krb39s8m"; procMacro = true; authors = [ "Joshua Liebow-Feeser <joshlf@google.com>" @@ -7969,8 +8049,9 @@ rec { # because we compiled those test binaries in the former and not the latter. # So all paths will expect source tree to be there and not in the build top directly. # For example: $NIX_BUILD_TOP := /build in general, if you ask yourself. - # TODO(raitobezarius): I believe there could be more edge cases if `crate.sourceRoot` - # do exist but it's very hard to reason about them, so let's wait until the first bug report. + # NOTE: There could be edge cases if `crate.sourceRoot` does exist but + # it's very hard to reason about them. + # Open a bug if you run into this! mkdir -p source/ cd source/ diff --git a/users/Profpatsch/my-prelude/default.nix b/users/Profpatsch/my-prelude/default.nix index e445115416..4bca8ea49f 100644 --- a/users/Profpatsch/my-prelude/default.nix +++ b/users/Profpatsch/my-prelude/default.nix @@ -7,6 +7,7 @@ pkgs.haskellPackages.mkDerivation { src = depot.users.Profpatsch.exactSource ./. [ ./my-prelude.cabal ./src/Aeson.hs + ./src/Arg.hs ./src/AtLeast.hs ./src/MyPrelude.hs ./src/Test.hs diff --git a/users/Profpatsch/my-prelude/my-prelude.cabal b/users/Profpatsch/my-prelude/my-prelude.cabal index 95a8399f37..2f7882a526 100644 --- a/users/Profpatsch/my-prelude/my-prelude.cabal +++ b/users/Profpatsch/my-prelude/my-prelude.cabal @@ -59,6 +59,7 @@ library exposed-modules: MyPrelude Aeson + Arg AtLeast Test Postgres.Decoder diff --git a/users/Profpatsch/my-prelude/src/Arg.hs b/users/Profpatsch/my-prelude/src/Arg.hs new file mode 100644 index 0000000000..a6ffa90924 --- /dev/null +++ b/users/Profpatsch/my-prelude/src/Arg.hs @@ -0,0 +1,34 @@ +module Arg where + +import Data.String (IsString) +import GHC.Exts (IsList) +import GHC.TypeLits (Symbol) + +-- | Wrap a function argument into this helper to give it a better description for the caller without disturbing the callsite too much. +-- +-- This has instances for IsString and Num, meaning if the caller is usually a string or number literal, it should Just Work. +-- +-- e.g. +-- +-- @ +-- myFoo :: Arg "used as the name in error message" Text -> IO () +-- myFoo (Arg name) = … +-- @ +-- +-- Will display the description in the inferred type of the callsite. +-- +-- Due to IsString you can call @myFoo@ like +-- +-- @myFoo "name in error"@ +-- +-- This is mostly intended for literals, if you want to wrap arbitrary data, use @Label@. +newtype Arg (description :: Symbol) a = Arg {unArg :: a} + deriving newtype + ( Show, + Eq, + IsString, + IsList, + Num, + Monoid, + Semigroup + ) diff --git a/users/Profpatsch/my-prelude/src/Postgres/MonadPostgres.hs b/users/Profpatsch/my-prelude/src/Postgres/MonadPostgres.hs index f83a6d7fcf..a542f8c7b8 100644 --- a/users/Profpatsch/my-prelude/src/Postgres/MonadPostgres.hs +++ b/users/Profpatsch/my-prelude/src/Postgres/MonadPostgres.hs @@ -5,13 +5,20 @@ module Postgres.MonadPostgres where +import Arg import AtLeast (AtLeast) import Control.Exception + ( Exception (displayException), + Handler (Handler), + catches, + try, + ) import Control.Foldl qualified as Fold import Control.Monad.Logger.CallStack (MonadLogger, logDebug, logWarn) import Control.Monad.Reader (MonadReader (ask), ReaderT (..)) import Control.Monad.Trans.Resource import Data.Aeson (FromJSON) +import Data.ByteString qualified as ByteString import Data.Error.Tree import Data.HashMap.Strict qualified as HashMap import Data.Int (Int64) @@ -28,8 +35,10 @@ import Database.PostgreSQL.Simple.FromRow qualified as PG import Database.PostgreSQL.Simple.ToField (ToField) import Database.PostgreSQL.Simple.ToRow (ToRow (toRow)) import Database.PostgreSQL.Simple.Types (Query (..)) +import GHC.IO.Handle (Handle) import GHC.Records (getField) import Label +import OpenTelemetry.Trace.Core (NewEvent (newEventName)) import OpenTelemetry.Trace.Core qualified as Otel hiding (inSpan, inSpan') import OpenTelemetry.Trace.Monad qualified as Otel import PossehlAnalyticsPrelude @@ -39,7 +48,9 @@ import Pretty (showPretty) import Seconds import System.Exit (ExitCode (..)) import Tool -import UnliftIO (MonadUnliftIO (withRunInIO)) +import UnliftIO (MonadUnliftIO (withRunInIO), bracket, hClose, mask_) +import UnliftIO.Concurrent (forkIO) +import UnliftIO.Process (ProcessHandle) import UnliftIO.Process qualified as Process import UnliftIO.Resource qualified as Resource import Prelude hiding (init, span) @@ -357,7 +368,7 @@ handlePGException :: ( ToRow params, MonadUnliftIO m, MonadLogger m, - HasField "pgFormat" tools Tool + HasField "pgFormat" tools PgFormatPool ) => tools -> Text -> @@ -405,6 +416,105 @@ withPGTransaction connPool f = connPool (\conn -> Postgres.withTransaction conn (f conn)) +-- | `pg_formatter` is a perl script that does not support any kind of streaming. +-- Thus we initialize a pool with a bunch of these scripts running, waiting for input. This way we can have somewhat fast SQL formatting. +-- +-- Call `initPgFormatPool` to initialize, then use `runPgFormat` to format some sql. +data PgFormatPool = PgFormatPool + { pool :: Pool PgFormatProcess, + pgFormat :: Tool + } + +data PgFormatProcess = PgFormatProcess + { stdinHdl :: Handle, + stdoutHdl :: Handle, + stderrHdl :: Handle, + procHdl :: ProcessHandle, + startedAt :: Otel.Timestamp + } + +initPgFormatPool :: (HasField "pgFormat" tools Tool) => tools -> IO PgFormatPool +initPgFormatPool tools = do + pool <- + Pool.newPool + ( Pool.defaultPoolConfig + (pgFormatStartCommandWaitForInput tools) + ( \pgFmt -> do + Process.terminateProcess pgFmt.procHdl + -- make sure we don’t leave any zombies + _ <- forkIO $ do + _ <- Process.waitForProcess pgFmt.procHdl + pure () + pure () + ) + -- unused resource time + 100 + -- number of resources + 10 + ) + + -- fill the pool with resources + let go = + Pool.tryWithResource pool (\_ -> go) >>= \case + Nothing -> pure () + Just () -> pure () + _ <- go + pure (PgFormatPool {pool, pgFormat = tools.pgFormat}) + +destroyPgFormatPool :: PgFormatPool -> IO () +destroyPgFormatPool pool = Pool.destroyAllResources pool.pool + +-- | Get the oldest resource from the pool, or stop if you find a resource that’s older than `cutoffPointMs`. +takeOldestResource :: PgFormatPool -> Arg "cutoffPointMs" Integer -> IO (PgFormatProcess, Pool.LocalPool PgFormatProcess) +takeOldestResource pool cutoffPointMs = do + now <- Otel.getTimestamp + mask_ $ do + a <- Pool.takeResource pool.pool + (putBack, res) <- go now [] a + -- make sure we don’t leak any resources we didn’t use in the end + for_ putBack $ \(x, xLocal) -> Pool.putResource xLocal x + pure res + where + mkMs ts = (ts & Otel.timestampNanoseconds & toInteger) `div` 1000_000 + go now putBack a@(a', _) = + if abs (mkMs now - mkMs a'.startedAt) > cutoffPointMs.unArg + then pure (putBack, a) + else + Pool.tryTakeResource pool.pool >>= \case + Nothing -> pure (putBack, a) + Just b@(b', _) -> do + if a'.startedAt < b'.startedAt + then go now (b : putBack) a + else go now (a : putBack) b + +-- | Format the given SQL with pg_formatter. Will use the pool of already running formatters to speed up execution. +runPgFormat :: PgFormatPool -> ByteString -> IO (T3 "exitCode" ExitCode "formatted" ByteString "stderr" ByteString) +runPgFormat pool sqlStatement = do + bracket + (takeOldestResource pool 200) + ( \(a, localPool) -> do + -- we always destroy the resource, because the process exited + Pool.destroyResource pool.pool localPool a + -- create a new process to keep the pool “warm” + new <- pgFormatStartCommandWaitForInput pool + Pool.putResource localPool new + ) + ( \(pgFmt, _localPool) -> do + putStderrLn "Running with warm pgformatter" + ByteString.hPut pgFmt.stdinHdl sqlStatement + -- close stdin to make pg_formatter format (it exits …) + -- issue: https://github.com/darold/pgFormatter/issues/333 + hClose pgFmt.stdinHdl + formatted <- ByteString.hGetContents pgFmt.stdoutHdl + errs <- ByteString.hGetContents pgFmt.stderrHdl + exitCode <- Process.waitForProcess pgFmt.procHdl + pure $ + T3 + (label @"exitCode" exitCode) + (label @"formatted" formatted) + (label @"stderr" errs) + ) + runPGTransactionImpl :: (MonadUnliftIO m) => m (Pool Postgres.Connection) -> @@ -418,7 +528,7 @@ runPGTransactionImpl zoom (Transaction transaction) = do unliftIO $ runReaderT transaction conn executeImpl :: - (ToRow params, MonadUnliftIO m, MonadLogger m, HasField "pgFormat" tools Tool, Otel.MonadTracer m) => + (ToRow params, MonadUnliftIO m, MonadLogger m, HasField "pgFormat" tools PgFormatPool, Otel.MonadTracer m) => m tools -> m DebugLogDatabaseQueries -> Query -> @@ -436,7 +546,7 @@ executeImpl zoomTools zoomDebugLogDatabaseQueries qry params = >>= toNumberOfRowsAffected "executeImpl" executeImpl_ :: - (MonadUnliftIO m, MonadLogger m, HasField "pgFormat" tools Tool, Otel.MonadTracer m) => + (MonadUnliftIO m, MonadLogger m, HasField "pgFormat" tools PgFormatPool, Otel.MonadTracer m) => m tools -> m DebugLogDatabaseQueries -> Query -> @@ -453,14 +563,14 @@ executeImpl_ zoomTools zoomDebugLogDatabaseQueries qry = >>= toNumberOfRowsAffected "executeImpl_" executeManyImpl :: - (ToRow params, MonadUnliftIO m, MonadLogger m, HasField "pgFormat" tools Tool, Otel.MonadTracer m) => + (ToRow params, MonadUnliftIO m, MonadLogger m, HasField "pgFormat" tools PgFormatPool, Otel.MonadTracer m) => m tools -> m DebugLogDatabaseQueries -> Query -> NonEmpty params -> Transaction m (Label "numberOfRowsAffected" Natural) executeManyImpl zoomTools zoomDebugLogDatabaseQueries qry params = - Otel.inSpan' "Postgres Query (execute)" Otel.defaultSpanArguments $ \span -> do + Otel.inSpan' "Postgres Query (executeMany)" Otel.defaultSpanArguments $ \span -> do tools <- lift @Transaction zoomTools logDatabaseQueries <- lift @Transaction zoomDebugLogDatabaseQueries traceQueryIfEnabled tools span logDatabaseQueries qry (HasMultiParams params) @@ -480,7 +590,7 @@ toNumberOfRowsAffected functionName i64 = <&> label @"numberOfRowsAffected" executeManyReturningWithImpl :: - (ToRow params, MonadUnliftIO m, MonadLogger m, HasField "pgFormat" tools Tool, Otel.MonadTracer m) => + (ToRow params, MonadUnliftIO m, MonadLogger m, HasField "pgFormat" tools PgFormatPool, Otel.MonadTracer m) => m tools -> m DebugLogDatabaseQueries -> Query -> @@ -489,7 +599,7 @@ executeManyReturningWithImpl :: Transaction m [r] {-# INLINE executeManyReturningWithImpl #-} executeManyReturningWithImpl zoomTools zoomDebugLogDatabaseQueries qry params (Decoder fromRow) = do - Otel.inSpan' "Postgres Query (execute)" Otel.defaultSpanArguments $ \span -> do + Otel.inSpan' "Postgres Query (executeManyReturning)" Otel.defaultSpanArguments $ \span -> do tools <- lift @Transaction zoomTools logDatabaseQueries <- lift @Transaction zoomDebugLogDatabaseQueries traceQueryIfEnabled tools span logDatabaseQueries qry (HasMultiParams params) @@ -501,7 +611,7 @@ foldRowsWithAccImpl :: ( ToRow params, MonadUnliftIO m, MonadLogger m, - HasField "pgFormat" tools Tool, + HasField "pgFormat" tools PgFormatPool, Otel.MonadTracer m ) => m tools -> @@ -535,7 +645,7 @@ foldRowsWithAccImpl zoomTools zoomDebugLogDatabaseQueries qry params (Decoder ro ) pgFormatQueryNoParams' :: - (MonadIO m, MonadLogger m, HasField "pgFormat" tools Tool) => + (MonadIO m, MonadLogger m, HasField "pgFormat" tools PgFormatPool) => tools -> Query -> Transaction m Text @@ -571,7 +681,7 @@ queryWithImpl :: ( ToRow params, MonadUnliftIO m, MonadLogger m, - HasField "pgFormat" tools Tool, + HasField "pgFormat" tools PgFormatPool, Otel.MonadTracer m ) => m tools -> @@ -582,7 +692,7 @@ queryWithImpl :: Transaction m [r] {-# INLINE queryWithImpl #-} queryWithImpl zoomTools zoomDebugLogDatabaseQueries qry params (Decoder fromRow) = do - Otel.inSpan' "Postgres Query (execute)" Otel.defaultSpanArguments $ \span -> do + Otel.inSpan' "Postgres Query (queryWith)" Otel.defaultSpanArguments $ \span -> do tools <- lift @Transaction zoomTools logDatabaseQueries <- lift @Transaction zoomDebugLogDatabaseQueries traceQueryIfEnabled tools span logDatabaseQueries qry (HasSingleParam params) @@ -593,7 +703,7 @@ queryWithImpl zoomTools zoomDebugLogDatabaseQueries qry params (Decoder fromRow) queryWithImpl_ :: ( MonadUnliftIO m, MonadLogger m, - HasField "pgFormat" tools Tool + HasField "pgFormat" tools PgFormatPool ) => m tools -> Query -> @@ -619,7 +729,7 @@ pgFormatQuery' :: ( MonadIO m, ToRow params, MonadLogger m, - HasField "pgFormat" tools Tool + HasField "pgFormat" tools PgFormatPool ) => tools -> Query -> @@ -633,7 +743,7 @@ pgFormatQueryMany' :: ( MonadIO m, ToRow params, MonadLogger m, - HasField "pgFormat" tools Tool + HasField "pgFormat" tools PgFormatPool ) => tools -> Query -> @@ -650,33 +760,58 @@ postgresToolsParser = label @"pgFormat" <$> readTool "pg_format" pgFormatQueryByteString :: ( MonadIO m, MonadLogger m, - HasField "pgFormat" tools Tool + HasField "pgFormat" tools PgFormatPool ) => tools -> ByteString -> m Text pgFormatQueryByteString tools queryBytes = do + res <- + liftIO $ + runPgFormat + tools.pgFormat + (queryBytes) + case res.exitCode of + ExitSuccess -> pure (res.formatted & bytesToTextUtf8Lenient) + ExitFailure status -> do + logWarn [fmt|pg_format failed with status {status} while formatting the query, using original query string. Is there a syntax error?|] + logDebug + ( prettyErrorTree + ( nestedMultiError + "pg_format output" + ( nestedError "stdout" (singleError (res.formatted & bytesToTextUtf8Lenient & newError)) + :| [(nestedError "stderr" (singleError (res.stderr & bytesToTextUtf8Lenient & newError)))] + ) + ) + ) + logDebug [fmt|pg_format stdout: stderr|] + pure (queryBytes & bytesToTextUtf8Lenient) + +pgFormatStartCommandWaitForInput :: + ( MonadIO m, + HasField "pgFormat" tools Tool, + MonadFail m + ) => + tools -> + m PgFormatProcess +pgFormatStartCommandWaitForInput tools = do do - (exitCode, stdout, stderr) <- - Process.readProcessWithExitCode - tools.pgFormat.toolPath - ["-"] - (queryBytes & bytesToTextUtf8Lenient & textToString) - case exitCode of - ExitSuccess -> pure (stdout & stringToText) - ExitFailure status -> do - logWarn [fmt|pg_format failed with status {status} while formatting the query, using original query string. Is there a syntax error?|] - logDebug - ( prettyErrorTree - ( nestedMultiError - "pg_format output" - ( nestedError "stdout" (singleError (stdout & stringToText & newError)) - :| [(nestedError "stderr" (singleError (stderr & stringToText & newError)))] - ) - ) + startedAt <- Otel.getTimestamp + (Just stdinHdl, Just stdoutHdl, Just stderrHdl, procHdl) <- + Process.createProcess + ( ( Process.proc + tools.pgFormat.toolPath + [ "--no-rcfile", + "-" + ] ) - logDebug [fmt|pg_format stdout: stderr|] - pure (queryBytes & bytesToTextUtf8Lenient) + { Process.std_in = Process.CreatePipe, + Process.std_out = Process.CreatePipe, + Process.std_err = Process.CreatePipe + } + ) + + pure PgFormatProcess {..} data DebugLogDatabaseQueries = -- | Do not log the database queries @@ -697,7 +832,7 @@ traceQueryIfEnabled :: ( ToRow params, MonadUnliftIO m, MonadLogger m, - HasField "pgFormat" tools Tool, + HasField "pgFormat" tools PgFormatPool, Otel.MonadTracer m ) => tools -> @@ -708,20 +843,25 @@ traceQueryIfEnabled :: Transaction m () traceQueryIfEnabled tools span logDatabaseQueries qry params = do -- In case we have query logging enabled, we want to do that - let formattedQuery = case params of - HasNoParams -> pgFormatQueryNoParams' tools qry - HasSingleParam p -> pgFormatQuery' tools qry p - HasMultiParams ps -> pgFormatQueryMany' tools qry ps + let formattedQuery = do + withEvent + span + "Query Format start" + "Query Format end" + $ case params of + HasNoParams -> pgFormatQueryNoParams' tools qry + HasSingleParam p -> pgFormatQuery' tools qry p + HasMultiParams ps -> pgFormatQueryMany' tools qry ps + let doLog errs = Otel.addAttributes span $ HashMap.fromList $ ( ("_.postgres.query", Otel.toAttribute @Text errs.query) : ( errs.explain - & foldMap - ( \ex -> - [("_.postgres.explain", Otel.toAttribute @Text ex)] - ) + & \case + Nothing -> [] + Just ex -> [("_.postgres.explain", Otel.toAttribute @Text ex)] ) ) let doExplain = do @@ -750,6 +890,37 @@ traceQueryIfEnabled tools span logDatabaseQueries qry params = do ex <- doExplain doLog (T2 (label @"query" q) (label @"explain" (Just ex))) +-- | Add a start and end event to the span, and figure out how long the difference was. +-- +-- This is more lightweight than starting an extra span for timing things. +withEvent :: (MonadIO f) => Otel.Span -> Text -> Text -> f b -> f b +withEvent span start end act = do + let mkMs ts = (ts & Otel.timestampNanoseconds & toInteger) `div` 1000_000 + s <- Otel.getTimestamp + Otel.addEvent + span + ( Otel.NewEvent + { newEventName = start, + newEventAttributes = mempty, + newEventTimestamp = Just s + } + ) + res <- act + e <- Otel.getTimestamp + let tookMs = + (mkMs e - mkMs s) + -- should be small enough + & fromInteger @Int + Otel.addEvent + span + ( Otel.NewEvent + { newEventName = end, + newEventAttributes = HashMap.fromList [("took ms", Otel.toAttribute tookMs)], + newEventTimestamp = Just e + } + ) + pure res + instance (ToField t1) => ToRow (Label l1 t1) where toRow t2 = toRow $ PG.Only $ getField @l1 t2 diff --git a/users/Profpatsch/whatcd-resolver/src/AppT.hs b/users/Profpatsch/whatcd-resolver/src/AppT.hs index 7afd430745..abe8ccad4c 100644 --- a/users/Profpatsch/whatcd-resolver/src/AppT.hs +++ b/users/Profpatsch/whatcd-resolver/src/AppT.hs @@ -19,14 +19,13 @@ import OpenTelemetry.Trace.Monad qualified as Otel import PossehlAnalyticsPrelude import Postgres.MonadPostgres import System.IO qualified as IO -import Tool (Tool) import UnliftIO import Prelude hiding (span) data Context = Context { config :: Label "logDatabaseQueries" DebugLogDatabaseQueries, tracer :: Otel.Tracer, - pgFormat :: Tool, + pgFormat :: PgFormatPool, pgConnPool :: Pool Postgres.Connection, transmissionSessionId :: MVar ByteString } diff --git a/users/Profpatsch/whatcd-resolver/src/Redacted.hs b/users/Profpatsch/whatcd-resolver/src/Redacted.hs index 4369c18408..c0c26b72d6 100644 --- a/users/Profpatsch/whatcd-resolver/src/Redacted.hs +++ b/users/Profpatsch/whatcd-resolver/src/Redacted.hs @@ -382,8 +382,8 @@ getTorrentById dat = do >>= ensureSingleRow -- | Find the best torrent for each torrent group (based on the seeding_weight) -getBestTorrents :: (MonadPostgres m) => Transaction m [TorrentData ()] -getBestTorrents = do +getBestTorrents :: (MonadPostgres m, HasField "onlyDownloaded" opts Bool) => opts -> Transaction m [TorrentData ()] +getBestTorrents opts = do queryWith [sql| SELECT * FROM ( @@ -393,15 +393,18 @@ getBestTorrents = do seeding_weight, t.full_json_result AS torrent_json, tg.full_json_result AS torrent_group_json, - t.torrent_file IS NOT NULL, + t.torrent_file IS NOT NULL as has_torrent_file, t.transmission_torrent_hash FROM redacted.torrents t JOIN redacted.torrent_groups tg ON tg.id = t.torrent_group ORDER BY group_id, seeding_weight DESC ) as _ + WHERE + -- onlyDownloaded + ((NOT ?::bool) OR has_torrent_file) ORDER BY seeding_weight DESC |] - () + (Only opts.onlyDownloaded :: Only Bool) ( do groupId <- Dec.fromField @Int torrentId <- Dec.fromField @Int diff --git a/users/Profpatsch/whatcd-resolver/src/WhatcdResolver.hs b/users/Profpatsch/whatcd-resolver/src/WhatcdResolver.hs index f1902bac8c..1ec23e1fc7 100644 --- a/users/Profpatsch/whatcd-resolver/src/WhatcdResolver.hs +++ b/users/Profpatsch/whatcd-resolver/src/WhatcdResolver.hs @@ -36,7 +36,6 @@ import Network.HTTP.Types import Network.HTTP.Types qualified as Http import Network.URI (URI) import Network.URI qualified -import Network.URI qualified as URI import Network.Wai (ResponseReceived) import Network.Wai qualified as Wai import Network.Wai.Handler.Warp qualified as Warp @@ -55,7 +54,6 @@ import System.Directory qualified as Xdg import System.Environment qualified as Env import System.FilePath ((</>)) import Text.Blaze.Html (Html) -import Text.Blaze.Html.Renderer.Pretty qualified as Html.Pretty import Text.Blaze.Html.Renderer.Utf8 qualified as Html import Text.Blaze.Html5 qualified as Html import Tool (readTool, readTools) @@ -77,7 +75,6 @@ main = htmlUi :: AppT IO () htmlUi = do - let debug = True uniqueRunId <- runTransaction $ querySingleRowWith @@ -87,13 +84,13 @@ htmlUi = do () (Dec.fromField @Text) - withRunInIO $ \runInIO -> Warp.run 9093 $ \req respond -> do + withRunInIO $ \runInIO -> Warp.run 9093 $ \req respondOrig -> do let catchAppException act = try act >>= \case Right a -> pure a Left (AppException err) -> do runInIO (logError err) - respond (Wai.responseLBS Http.status500 [] "") + respondOrig (Wai.responseLBS Http.status500 [] "") catchAppException $ do let mp span parser = @@ -119,9 +116,9 @@ htmlUi = do let handlers :: Handlers (AppT IO) handlers respond = Map.fromList - [ ("", respond.h (mainHtml uniqueRunId)), + [ ("", respond.html (mainHtml uniqueRunId)), ( "snips/redacted/search", - respond.h $ + respond.html $ \span -> do dat <- mp @@ -132,12 +129,12 @@ htmlUi = do snipsRedactedSearch dat ), ( "snips/redacted/torrentDataJson", - respond.h $ \span -> do + respond.html $ \span -> do dat <- torrentIdMp span Html.mkVal <$> (runTransaction $ getTorrentById dat) ), ( "snips/redacted/getTorrentFile", - respond.h $ \span -> do + respond.html $ \span -> do dat <- torrentIdMp span runTransaction $ do inserted <- redactedGetTorrentFileAndInsert dat @@ -157,7 +154,7 @@ htmlUi = do ), -- TODO: this is bad duplication?? ( "snips/redacted/startTorrentFile", - respond.h $ \span -> do + respond.html $ \span -> do dat <- torrentIdMp span runTransaction $ do file <- @@ -180,7 +177,7 @@ htmlUi = do "Starting" ), ( "snips/transmission/getTorrentState", - respond.h $ \span -> do + respond.html $ \span -> do dat <- mp span $ label @"torrentHash" <$> Multipart.field "torrent-hash" Field.utf8 status <- doTransmissionRequest' @@ -199,7 +196,7 @@ htmlUi = do Just _torrent -> [hsx|Running|] ), ( "snips/jsonld/render", - respond.h $ \span -> do + respond.html $ \span -> do qry <- parseQueryArgs span @@ -211,6 +208,16 @@ htmlUi = do jsonld <- httpGetJsonLd (qry.target) pure $ renderJsonld jsonld ), + ( "artist", + respond.html $ \span -> do + qry <- + parseQueryArgs + span + ( label @"dbId" + <$> (singleQueryArgument "db_id" Field.utf8) + ) + artistPage qry + ), ( "autorefresh", respond.plain $ do qry <- @@ -233,23 +240,22 @@ htmlUi = do ] runInIO $ runHandlers - debug - (\respond -> respond.h $ (mainHtml uniqueRunId)) + (\respond -> respond.html $ (mainHtml uniqueRunId)) handlers req - respond + respondOrig where everySecond :: Text -> Enc -> Html -> Html everySecond call extraData innerHtml = [hsx|<div hx-trigger="every 1s" hx-swap="outerHTML" hx-post={call} hx-vals={Enc.encToBytesUtf8 extraData}>{innerHtml}</div>|] mainHtml :: Text -> Otel.Span -> AppT IO Html mainHtml uniqueRunId _span = runTransaction $ do - jsonld <- - httpGetJsonLd - ( URI.parseURI "https://musicbrainz.org/work/92000fd4-d304-406d-aeb4-6bdbeed318ec" & annotate "not an URI" & unwrapError, - "https://musicbrainz.org/work/92000fd4-d304-406d-aeb4-6bdbeed318ec" - ) - <&> renderJsonld + -- jsonld <- + -- httpGetJsonLd + -- ( URI.parseURI "https://musicbrainz.org/work/92000fd4-d304-406d-aeb4-6bdbeed318ec" & annotate "not an URI" & unwrapError, + -- "https://musicbrainz.org/work/92000fd4-d304-406d-aeb4-6bdbeed318ec" + -- ) + -- <&> renderJsonld bestTorrentsTable <- getBestTorrentsTable -- transmissionTorrentsTable <- lift @Transaction getTransmissionTorrentsTable pure $ @@ -271,7 +277,6 @@ htmlUi = do </style> </head> <body> - {jsonld} <form hx-post="/snips/redacted/search" hx-target="#redacted-search-results"> @@ -300,44 +305,49 @@ htmlUi = do </body> |] +artistPage :: (HasField "dbId" dat Text, Applicative m) => dat -> m Html +artistPage dat = do + pure + [hsx| + Artist ID: {dat.dbId} + |] + type Handlers m = HandlerResponses m -> Map Text (m ResponseReceived) -type HandlerResponses m = T2 "h" ((Otel.Span -> m Html) -> m ResponseReceived) "plain" (m Wai.Response -> m ResponseReceived) +data HandlerResponses m = HandlerResponses + { -- | render html + html :: ((Otel.Span -> m Html) -> m ResponseReceived), + -- | render a plain wai response + plain :: (m Wai.Response -> m ResponseReceived) + } runHandlers :: (MonadOtel m) => - Bool -> (HandlerResponses m -> m ResponseReceived) -> (HandlerResponses m -> Map Text (m ResponseReceived)) -> Wai.Request -> (Wai.Response -> IO ResponseReceived) -> m ResponseReceived -runHandlers debug defaultHandler handlers req respond = withRunInIO $ \runInIO -> do - let renderHtml = - if debug - then Html.Pretty.renderHtml >>> stringToText >>> textToBytesUtf8 >>> toLazyBytes - else Html.renderHtml - let hh route act = - Otel.inSpan' - [fmt|Route {route }|] - ( Otel.defaultSpanArguments - { Otel.attributes = - HashMap.fromList - [ ("server.path", Otel.toAttribute @Text route) - ] - } - ) - ( \span -> do - res <- act span - liftIO $ respond . Wai.responseLBS Http.ok200 ([("Content-Type", "text/html")] <> res.extraHeaders) . renderHtml $ res.html - ) - let h route act = hh route (\span -> act span <&> (\html -> T2 (label @"html" html) (label @"extraHeaders" []))) - - let path = (req & Wai.pathInfo & Text.intercalate "/") +runHandlers defaultHandler handlers req respond = withRunInIO $ \runInIO -> do + let path = req & Wai.pathInfo & Text.intercalate "/" let handlerResponses = - ( T2 - (label @"h" (h path)) - (label @"plain" (\m -> liftIO $ runInIO m >>= respond)) + ( HandlerResponses + { plain = (\m -> liftIO $ runInIO m >>= respond), + html = \act -> + Otel.inSpan' + [fmt|Route /{path}|] + ( Otel.defaultSpanArguments + { Otel.attributes = + HashMap.fromList + [ ("server.path", Otel.toAttribute @Text path) + ] + } + ) + ( \span -> do + res <- act span <&> (\html -> T2 (label @"html" html) (label @"extraHeaders" [])) + liftIO $ respond . Wai.responseLBS Http.ok200 ([("Content-Type", "text/html")] <> res.extraHeaders) . Html.renderHtml $ res.html + ) + } ) let handler = (handlers handlerResponses) @@ -428,7 +438,7 @@ getBestTorrentsTable :: ) => Transaction m Html getBestTorrentsTable = do - bestStale :: [TorrentData ()] <- getBestTorrents + bestStale :: [TorrentData ()] <- getBestTorrents (label @"onlyDownloaded" False) actual <- getAndUpdateTransmissionTorrentsStatus ( bestStale @@ -462,11 +472,16 @@ getBestTorrentsTable = do fresh & foldMap ( \b -> do + let artistLink :: Text = [fmt|/artist?db_id={b.groupId}|] [hsx| <tr> <td>{localTorrent b}</td> <td>{Html.toHtml @Int b.groupId}</td> - <td>{Html.toHtml @Text b.torrentGroupJson.artist}</td> + <td> + <a href={artistLink}> + {Html.toHtml @Text b.torrentGroupJson.artist} + </a> + </td> <td>{Html.toHtml @Text b.torrentGroupJson.groupName}</td> <td>{Html.toHtml @Int b.seedingWeight}</td> <td><details hx-trigger="toggle once" hx-post="snips/redacted/torrentDataJson" hx-vals={Enc.encToBytesUtf8 $ Enc.object [("torrent-id", Enc.int b.torrentId)]}></details></td> @@ -624,7 +639,8 @@ httpTorrent span req = runAppWith :: AppT IO a -> IO (Either TmpPg.StartError a) runAppWith appT = withTracer $ \tracer -> withDb $ \db -> do - pgFormat <- readTools (label @"toolsEnvVar" "WHATCD_RESOLVER_TOOLS") (readTool "pg_format") + tool <- readTools (label @"toolsEnvVar" "WHATCD_RESOLVER_TOOLS") (readTool "pg_format") + pgFormat <- initPgFormatPool (label @"pgFormat" tool) let config = label @"logDatabaseQueries" LogDatabaseQueries pgConnPool <- Pool.newPool $ diff --git a/users/Profpatsch/whatcd-resolver/whatcd-resolver.cabal b/users/Profpatsch/whatcd-resolver/whatcd-resolver.cabal index a9bd04827b..8b3258bb5f 100644 --- a/users/Profpatsch/whatcd-resolver/whatcd-resolver.cabal +++ b/users/Profpatsch/whatcd-resolver/whatcd-resolver.cabal @@ -119,3 +119,7 @@ executable whatcd-resolver build-depends: base >=4.15 && <5, whatcd-resolver + + ghc-options: + -threaded + diff --git a/users/amjoseph/OWNERS b/users/amjoseph/OWNERS new file mode 100644 index 0000000000..a99992be60 --- /dev/null +++ b/users/amjoseph/OWNERS @@ -0,0 +1,3 @@ +set noparent + +amjoseph diff --git a/users/amjoseph/keys.nix b/users/amjoseph/keys.nix new file mode 100644 index 0000000000..8cc2f24369 --- /dev/null +++ b/users/amjoseph/keys.nix @@ -0,0 +1,22 @@ +{ ... }: + +let + # Long-term, air-gapped PGP key. This key is used only for signing other + # keys. It is a minor hassle for me to access this key. + airgap = "F0B74D717CDE8412A3E0D4D5F29AC8080DA8E1E0"; + + # Stored in an HSM. Signed by the above key. + current = "D930411B675A011EB9590713DC4AB809B13BE76D"; + + # Chat protocols that depend on DNS, WebPKI, or E.164 are lame. This is not. + ricochet = "emhxygy5mezcovm5a6q5hze5eqfqgieww56eh4ttwmrolwqmzgb6qiyd"; + + # This ssh key is for depot. Please don't use it elsewhere, except to give + # me the ability to set a system-specific key elsewhere. Not currently + # stored in an HSM, but I'm working on that. + ssh-for-depot = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOE5e0HrwQTI5KOaU12J0AJG5zDpWn4g/U+oFXz7SkbD"; + +in +{ + all = [ ssh-for-depot ]; +} diff --git a/users/aspen/system/home/modules/games.nix b/users/aspen/system/home/modules/games.nix index b7653bb058..dc6331d648 100644 --- a/users/aspen/system/home/modules/games.nix +++ b/users/aspen/system/home/modules/games.nix @@ -15,14 +15,14 @@ let }); init = runCommand "init.txt" { } '' - substitute "${df-orig}/data/init/init.txt" $out \ + substitute "${df-orig}/data/init/init_default.txt" $out \ --replace "[INTRO:YES]" "[INTRO:NO]" \ --replace "[VOLUME:255]" "[VOLUME:0]" \ --replace "[FPS:NO]" "[FPS:YES]" ''; d_init = runCommand "d_init.txt" { } '' - substitute "${df-orig}/data/init/d_init.txt" $out \ + substitute "${df-orig}/data/init/d_init_default.txt" $out \ --replace "[AUTOSAVE:NONE]" "[AUTOSAVE:SEASONAL]" \ --replace "[AUTOSAVE_PAUSE:NO]" "[AUTOSAVE_PAUSE:YES]" \ --replace "[INITIAL_SAVE:NO]" "[INITIAL_SAVE:YES]" \ diff --git a/users/flokli/keyboards/dilemma/default.nix b/users/flokli/keyboards/dilemma/default.nix index 265f8e56db..cd05b288e8 100644 --- a/users/flokli/keyboards/dilemma/default.nix +++ b/users/flokli/keyboards/dilemma/default.nix @@ -1,16 +1,18 @@ { depot, pkgs, ... }: rec { + qmk_firmware_src = pkgs.fetchFromGitHub { + owner = "qmk"; + repo = "qmk_firmware"; + rev = "0.24.8"; + hash = "sha256-DRHPfJXF1KF1+EwkbeGhqhVrpfp21JY2spOZxesZFbA="; + fetchSubmodules = true; + }; + firmware = pkgs.stdenv.mkDerivation { name = "keychron-bastardkb-dilemma-firmware"; - src = pkgs.fetchFromGitHub { - owner = "qmk"; - repo = "qmk_firmware"; - rev = "728aa576b0cd65c6fb7cf77132fdcd06fcedb643"; # develop branch - hash = "sha256-YmdX8nEsB1R8d265HAmvwejPjEHJdoTnm4QNigzrcyw="; - fetchSubmodules = true; - }; + src = qmk_firmware_src; patches = [ ./enable-taps.patch ]; @@ -38,7 +40,7 @@ rec { }; flash = pkgs.writeShellScript "flash.sh" '' - ${pkgs.qmk}/bin/qmk flash ${firmware}/bastardkb_dilemma_3x5_3_flokli.uf2 + QMK_HOME=${qmk_firmware_src} ${pkgs.qmk}/bin/qmk flash ${firmware}/bastardkb_dilemma_3x5_3_flokli.uf2 ''; meta.ci.targets = [ "firmware" ]; diff --git a/users/flokli/keyboards/k6_pro/default.nix b/users/flokli/keyboards/k6_pro/default.nix index 708bec7313..49945b88ae 100644 --- a/users/flokli/keyboards/k6_pro/default.nix +++ b/users/flokli/keyboards/k6_pro/default.nix @@ -1,16 +1,18 @@ { depot, pkgs, ... }: rec { + qmk_firmware_src = pkgs.fetchFromGitHub { + owner = "Keychron"; # the Keychron fork of qmk/qmk_firmware + repo = "qmk_firmware"; + rev = "e0a48783e7cde92d1edfc53a8fff511c45e869d4"; # bluetooth_playground branch + hash = "sha256-Pk9kXktmej9JyvSt7UMEW2FDrBg7k1lOssh6HjrP5ro="; + fetchSubmodules = true; + }; + firmware = pkgs.stdenv.mkDerivation { name = "keychron-k6_pro-firmware"; - src = pkgs.fetchFromGitHub { - owner = "Keychron"; # the Keychron fork of qmk/qmk_firmware - repo = "qmk_firmware"; - rev = "e0a48783e7cde92d1edfc53a8fff511c45e869d4"; # bluetooth_playground branch - hash = "sha256-Pk9kXktmej9JyvSt7UMEW2FDrBg7k1lOssh6HjrP5ro="; - fetchSubmodules = true; - }; + src = qmk_firmware_src; nativeBuildInputs = [ pkgs.qmk @@ -32,7 +34,7 @@ rec { }; flash = pkgs.writeShellScript "flash.sh" '' - ${pkgs.qmk}/bin/qmk flash ${firmware}/keychron_k6_pro_ansi_rgb_flokli.bin + QMK_HOME=${qmk_firmware_src} ${pkgs.qmk}/bin/qmk flash ${firmware}/keychron_k6_pro_ansi_rgb_flokli.bin ''; meta.ci.targets = [ "firmware" ]; diff --git a/users/picnoir/tvix-daemon/src/main.rs b/users/picnoir/tvix-daemon/src/main.rs index 102067fcf7..dc49b209e0 100644 --- a/users/picnoir/tvix-daemon/src/main.rs +++ b/users/picnoir/tvix-daemon/src/main.rs @@ -4,7 +4,7 @@ use tokio_listener::{self, SystemOptions, UserOptions}; use tracing::{debug, error, info, instrument, Level}; use nix_compat::worker_protocol::{self, server_handshake_client, ClientSettings, Trust}; -use nix_compat::{wire, ProtocolVersion}; +use nix_compat::ProtocolVersion; #[derive(Parser, Debug)] struct Cli { @@ -78,7 +78,9 @@ where // TODO: implement logging. For now, we'll just send // STDERR_LAST, which is good enough to get Nix respond to // us. - wire::write_u64(&mut client_connection.conn, worker_protocol::STDERR_LAST) + client_connection + .conn + .write_u64_le(worker_protocol::STDERR_LAST) .await .unwrap(); loop { @@ -109,6 +111,6 @@ where let settings = worker_protocol::read_client_settings(&mut conn.conn, conn.version).await?; // The client expects us to send some logs when we're processing // the settings. Sending STDERR_LAST signal we're done processing. - wire::write_u64(&mut conn.conn, worker_protocol::STDERR_LAST).await?; + conn.conn.write_u64_le(worker_protocol::STDERR_LAST).await?; Ok(settings) } diff --git a/users/tazjin/nixos/koptevo/default.nix b/users/tazjin/nixos/koptevo/default.nix index 39a4887c72..ea8dfd4bd8 100644 --- a/users/tazjin/nixos/koptevo/default.nix +++ b/users/tazjin/nixos/koptevo/default.nix @@ -129,10 +129,11 @@ in ''; }; - # I don't use the podcast feature, but I *have to* supply podcasts - # to gonic ... + # I don't use the podcast nor playlist feature, + # but I *have to* supply podcasts to gonic ... systemd.tmpfiles.rules = [ "d /tmp/fake-podcasts 0555 nobody nobody -" + "d /tmp/fake-playlists 0555 nobody nobody -" ]; services.gonic = { @@ -142,6 +143,7 @@ in scan-interval = 5; scan-at-start-enabled = true; podcast-path = [ "/tmp/fake-podcasts" ]; + playlists-path = [ "/tmp/fake-playlists" ]; music-path = [ "/var/lib/geesefs/tazjins-files/music" ]; }; }; diff --git a/users/tazjin/nixos/modules/physical.nix b/users/tazjin/nixos/modules/physical.nix index bb85c6fb98..d469da7e5a 100644 --- a/users/tazjin/nixos/modules/physical.nix +++ b/users/tazjin/nixos/modules/physical.nix @@ -24,6 +24,7 @@ in users.tazjin.chase-geese config.tazjin.emacs third_party.agenix.cli + tools.when ]) ++ # programs from nixpkgs |