From b8552c65d593422ffcc0ddc2dc3e1d8080a4cad1 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Sun, 11 Aug 2024 12:00:47 -0700 Subject: [PATCH 1/7] ci: add a basic illumos test job This commit adds a simple test job for illumos using Buildomat. I'd like to make some more improvements and add docs before merging this, but I want to see if it even works first. --- .github/buildomat/jobs/test-full.sh | 44 +++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 .github/buildomat/jobs/test-full.sh diff --git a/.github/buildomat/jobs/test-full.sh b/.github/buildomat/jobs/test-full.sh new file mode 100644 index 00000000000..40f9920c631 --- /dev/null +++ b/.github/buildomat/jobs/test-full.sh @@ -0,0 +1,44 @@ +#!/bin/bash +#: +#: name = "illumos-test-full" +#: variety = "basic" +#: target = "omnios-r151038" +#: rust_toolchain = "stable" + +# basic illumos test job with --features full. +# TODO(eliza): consider splitting the "build" and "test" jobs into separate +# buildomat jobs, so that the build and test jobs can fail independently. + +set -o errexit +set -o pipefail +set -o xtrace + + +# These are the same env vars that are set for all GitHub Actions CI jobs. +export RUSTFLAGS="-Dwarnings" +export RUST_BACKTRACE=1 +# We're building once, so there's no need to incur the overhead of an +# incremental build. +export CARGO_INCREMENTAL=0 + +# NOTE: Currently we use the latest cargo-nextest release, since this is what +# the Linux CI jobs do. If we ever start pinning our nextest version, this +# should be changed to match that. +NEXTEST_VERSION='latest' + + +curl -sSfL --retry 10 "https://get.nexte.st/$NEXTEST_VERSION/illumos" | gunzip | tar -xvf - -C ~/.cargo/bin + +# Print the current test execution environment +uname -a +cargo --version +rustc --version + +banner build +ptime -m cargo test --no-run --all --verbose --features full + +banner tests +ptime -m cargo nextest run --features full + +banner doctests +ptime -m cargo test --doc --verbose --features full From 38cc55b33887234e720898efdd259ba1e53b223e Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Mon, 12 Aug 2024 11:34:06 -0700 Subject: [PATCH 2/7] change `anon_pipe_spawn_echo` to use `printf(1)` The illumos version of `echo(1)` doesn't support `-n`, so it interprets it literally as part of the string to echo, making the test fail. I've changed it to use `printf(1)` with no flags args to get the same behavior. I also changed the name of the test, since it doesn't actually spawn `echo(1)` anymore :) --- tokio/tests/net_unix_pipe.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tokio/tests/net_unix_pipe.rs b/tokio/tests/net_unix_pipe.rs index 37b8b41bd31..a28303e97c3 100644 --- a/tokio/tests/net_unix_pipe.rs +++ b/tokio/tests/net_unix_pipe.rs @@ -457,15 +457,14 @@ async fn anon_pipe_simple_send() -> io::Result<()> { } #[tokio::test] -async fn anon_pipe_spawn_echo() -> std::io::Result<()> { +async fn anon_pipe_spawn_printf() -> std::io::Result<()> { use tokio::process::Command; const DATA: &str = "this is some data to write to the pipe"; let (tx, mut rx) = pipe::pipe()?; - let status = Command::new("echo") - .arg("-n") + let status = Command::new("printf") .arg(DATA) .stdout(tx.into_blocking_fd()?) .status(); From 1064b29f9485471c9ce5c07d1163ac90d62599b6 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Mon, 12 Aug 2024 13:22:03 -0700 Subject: [PATCH 3/7] fix `uds_stream::epollhup` test --- tokio/tests/uds_stream.rs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tokio/tests/uds_stream.rs b/tokio/tests/uds_stream.rs index b8c4e6a8eed..51403718528 100644 --- a/tokio/tests/uds_stream.rs +++ b/tokio/tests/uds_stream.rs @@ -406,6 +406,16 @@ async fn epollhup() -> io::Result<()> { drop(listener); let err = connect.await.unwrap_err(); - assert_eq!(err.kind(), io::ErrorKind::ConnectionReset); + + // On illumos (and presumably other Solaris descendents), `connect(3SOCKET)` + // returns `ECONNREFUSED` here rather than `ECONNRESET`. + // + // See: https://illumos.org/man/3SOCKET/connect + let expected_errno = if cfg!(target_os = "illumos") || cfg!(target_os = "solaris") { + io::ErrorKind::ConnectionRefused + } else { + io::ErrorKind::ConnectionReset + }; + assert_eq!(err.kind(), expected_errno); Ok(()) } From ed03ae90a97f832a7571d14ce3cc6354a12d213f Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Mon, 12 Aug 2024 13:23:23 -0700 Subject: [PATCH 4/7] ci: disable nextest failfast --- .github/buildomat/jobs/test-full.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/buildomat/jobs/test-full.sh b/.github/buildomat/jobs/test-full.sh index 40f9920c631..e335aeeb528 100644 --- a/.github/buildomat/jobs/test-full.sh +++ b/.github/buildomat/jobs/test-full.sh @@ -38,7 +38,7 @@ banner build ptime -m cargo test --no-run --all --verbose --features full banner tests -ptime -m cargo nextest run --features full +ptime -m cargo nextest run --features full --no-fail-fast banner doctests ptime -m cargo test --doc --verbose --features full From 35c1465555c72e8a5cf0e651e288e5db0713797f Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Wed, 14 Aug 2024 11:34:29 -0700 Subject: [PATCH 5/7] make errno assertion less flaky --- tokio/tests/uds_stream.rs | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/tokio/tests/uds_stream.rs b/tokio/tests/uds_stream.rs index 51403718528..cb40cbd1832 100644 --- a/tokio/tests/uds_stream.rs +++ b/tokio/tests/uds_stream.rs @@ -406,16 +406,17 @@ async fn epollhup() -> io::Result<()> { drop(listener); let err = connect.await.unwrap_err(); - - // On illumos (and presumably other Solaris descendents), `connect(3SOCKET)` - // returns `ECONNREFUSED` here rather than `ECONNRESET`. - // - // See: https://illumos.org/man/3SOCKET/connect - let expected_errno = if cfg!(target_os = "illumos") || cfg!(target_os = "solaris") { - io::ErrorKind::ConnectionRefused - } else { - io::ErrorKind::ConnectionReset - }; - assert_eq!(err.kind(), expected_errno); + let errno = err.kind(); + assert!( + // As far as I can tell, whether we see ECONNREFUSED or ECONNRESET here + // seems relatively inconsistent, at least on non-Linux operating + // systems. The difference in meaning between these errnos is not + // particularly well-defined, so let's just accept either. + matches!( + errno, + io::ErrorKind::ConnectionRefused | io::ErrorKind::ConnectionReset + ), + "unexpected error kind: {errno:?} (expected ConnectionRefused or ConnectionReset)" + ); Ok(()) } From f18d6ed7d4e0724bbe14db5519d7c80b3227a1a9 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Wed, 14 Aug 2024 11:34:55 -0700 Subject: [PATCH 6/7] make io_async_fd tests always drain written amount --- tokio/tests/io_async_fd.rs | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/tokio/tests/io_async_fd.rs b/tokio/tests/io_async_fd.rs index 3ab1cebd884..a9f22a0e3d7 100644 --- a/tokio/tests/io_async_fd.rs +++ b/tokio/tests/io_async_fd.rs @@ -135,15 +135,17 @@ fn socketpair() -> (FileDescriptor, FileDescriptor) { fds } -fn drain(mut fd: &FileDescriptor) { +fn drain(mut fd: &FileDescriptor, mut amt: usize) { let mut buf = [0u8; 512]; - #[allow(clippy::unused_io_amount)] - loop { + while amt > 0 { match fd.read(&mut buf[..]) { - Err(e) if e.kind() == ErrorKind::WouldBlock => break, + Err(e) if e.kind() == ErrorKind::WouldBlock => {} Ok(0) => panic!("unexpected EOF"), Err(e) => panic!("unexpected error: {:?}", e), - Ok(_) => continue, + Ok(x) => { + amt -= x; + continue; + } } } } @@ -219,10 +221,10 @@ async fn reset_writable() { let mut guard = afd_a.writable().await.unwrap(); // Write until we get a WouldBlock. This also clears the ready state. - while guard - .try_io(|_| afd_a.get_ref().write(&[0; 512][..])) - .is_ok() - {} + let mut bytes = 0; + while let Ok(Ok(amt)) = guard.try_io(|_| afd_a.get_ref().write(&[0; 512][..])) { + bytes += amt; + } // Writable state should be cleared now. let writable = afd_a.writable(); @@ -234,7 +236,7 @@ async fn reset_writable() { } // Read from the other side; we should become writable now. - drain(&b); + drain(&b, bytes); let _ = writable.await.unwrap(); } @@ -386,7 +388,10 @@ async fn poll_fns() { let afd_b = Arc::new(AsyncFd::new(b).unwrap()); // Fill up the write side of A - while afd_a.get_ref().write(&[0; 512]).is_ok() {} + let mut bytes = 0; + while let Ok(amt) = afd_a.get_ref().write(&[0; 512]) { + bytes += amt; + } let waker = TestWaker::new(); @@ -446,7 +451,7 @@ async fn poll_fns() { } // Make it writable now - drain(afd_b.get_ref()); + drain(afd_b.get_ref(), bytes); // now we should be writable (ie - the waker for poll_write should still be registered after we wake the read side) let _ = write_fut.await; From 347b921a3df3ad7738176e67424c61948a5e2996 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Wed, 14 Aug 2024 12:54:28 -0700 Subject: [PATCH 7/7] task: illumos/Solaris have thread-local weirdness ## Motivation In `tokio::task::local`, there's a `LocalState::assert_called_from_owner_thread` function, which checks that the caller's thread ID matches that of the thread on which the `LocalSet` was created. This assertion is disabled on FreeBSD and OpenBSD, with a comment indicating that this is because those systems have "some weirdness around thread-local destruction". If memory serves, I believe the "weirdness" was related to the order in which thread-locals are destroyed relative to each other, or something along those lines. Upon running the tests on illumos, there appears to be [a similar panic][1] in the `task_local_set_in_thread_local` due to this assertion assertion while dropping a `LocalSet` which lives in a thread-local. This leads me to believe that illumos, and presumably Solaris, also exhibit the same "weirdness". This wouldn't be too surprising, given the shared heritage of those systems. ## Solution This commit adds `target_os="illumos"` and `target_os="solaris"` to the `cfg` attribute that disables the assertion on systems determined to have weirdness. This fixes the test panicking on illumos. In the future, it might be worthwhile to look into changign the assertion to only be disabled on weirdness-having systems _while dropping the `LocalSet`_, rather than all the time. That way, we can still check other accesses. But, I wanted to make the minimum change necessary to fix it here before messing around with that. [1]: https://buildomat.eng.oxide.computer/wg/0/details/01J592RB0JR203RGGN0RYHJHMY/CHieo1Uee7qzRVyp811YBl0MvXGO3i0QA9ezPaFWj6hf6P3P/01J592RSWCNX1MCFYGW74AHVH6#S1954 --- tokio/src/task/local.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tokio/src/task/local.rs b/tokio/src/task/local.rs index 08d89c49c03..baab6805b6c 100644 --- a/tokio/src/task/local.rs +++ b/tokio/src/task/local.rs @@ -1160,9 +1160,14 @@ impl LocalState { #[track_caller] fn assert_called_from_owner_thread(&self) { - // FreeBSD has some weirdness around thread-local destruction. + // BSDs and Solarises has some weirdness around thread-local destruction. // TODO: remove this hack when thread id is cleaned up - #[cfg(not(any(target_os = "openbsd", target_os = "freebsd")))] + #[cfg(not(any( + target_os = "openbsd", + target_os = "freebsd", + target_os = "illumos", + target_os = "solaris", + )))] debug_assert!( // if we couldn't get the thread ID because we're dropping the local // data, skip the assertion --- the `Drop` impl is not going to be