From b8552c65d593422ffcc0ddc2dc3e1d8080a4cad1 Mon Sep 17 00:00:00 2001
From: Eliza Weisman <eliza@elizas.website>
Date: Sun, 11 Aug 2024 12:00:47 -0700
Subject: [PATCH 1/7] ci: add a basic illumos test job

This commit adds a simple test job for illumos using Buildomat. I'd like
to make some more improvements and add docs before merging this, but I
want to see if it even works first.
---
 .github/buildomat/jobs/test-full.sh | 44 +++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 .github/buildomat/jobs/test-full.sh

diff --git a/.github/buildomat/jobs/test-full.sh b/.github/buildomat/jobs/test-full.sh
new file mode 100644
index 00000000000..40f9920c631
--- /dev/null
+++ b/.github/buildomat/jobs/test-full.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+#:
+#: name = "illumos-test-full"
+#: variety = "basic"
+#: target = "omnios-r151038"
+#: rust_toolchain = "stable"
+
+# basic illumos test job with --features full.
+# TODO(eliza): consider splitting the "build" and "test" jobs into separate
+# buildomat jobs, so that the build and test jobs can fail independently.
+
+set -o errexit
+set -o pipefail
+set -o xtrace
+
+
+# These are the same env vars that are set for all GitHub Actions CI jobs.
+export RUSTFLAGS="-Dwarnings"
+export RUST_BACKTRACE=1
+# We're building once, so there's no need to incur the overhead of an
+# incremental build.
+export CARGO_INCREMENTAL=0
+
+# NOTE: Currently we use the latest cargo-nextest release, since this is what
+# the Linux CI jobs do. If we ever start pinning our nextest version, this
+# should be changed to match that.
+NEXTEST_VERSION='latest'
+
+
+curl -sSfL --retry 10 "https://get.nexte.st/$NEXTEST_VERSION/illumos" | gunzip | tar -xvf - -C ~/.cargo/bin
+
+# Print the current test execution environment
+uname -a
+cargo --version
+rustc --version
+
+banner build
+ptime -m cargo test --no-run --all --verbose --features full
+
+banner tests
+ptime -m cargo nextest run --features full
+
+banner doctests
+ptime -m cargo test --doc --verbose --features full

From 38cc55b33887234e720898efdd259ba1e53b223e Mon Sep 17 00:00:00 2001
From: Eliza Weisman <eliza@elizas.website>
Date: Mon, 12 Aug 2024 11:34:06 -0700
Subject: [PATCH 2/7] change `anon_pipe_spawn_echo` to use `printf(1)`

The illumos version of `echo(1)` doesn't support `-n`, so it interprets
it literally as part of the string to echo, making the test fail. I've
changed it to use `printf(1)` with no flags args to get the same
behavior.

I also changed the name of the test, since it doesn't actually spawn
`echo(1)` anymore :)
---
 tokio/tests/net_unix_pipe.rs | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tokio/tests/net_unix_pipe.rs b/tokio/tests/net_unix_pipe.rs
index 37b8b41bd31..a28303e97c3 100644
--- a/tokio/tests/net_unix_pipe.rs
+++ b/tokio/tests/net_unix_pipe.rs
@@ -457,15 +457,14 @@ async fn anon_pipe_simple_send() -> io::Result<()> {
 }
 
 #[tokio::test]
-async fn anon_pipe_spawn_echo() -> std::io::Result<()> {
+async fn anon_pipe_spawn_printf() -> std::io::Result<()> {
     use tokio::process::Command;
 
     const DATA: &str = "this is some data to write to the pipe";
 
     let (tx, mut rx) = pipe::pipe()?;
 
-    let status = Command::new("echo")
-        .arg("-n")
+    let status = Command::new("printf")
         .arg(DATA)
         .stdout(tx.into_blocking_fd()?)
         .status();

From 1064b29f9485471c9ce5c07d1163ac90d62599b6 Mon Sep 17 00:00:00 2001
From: Eliza Weisman <eliza@elizas.website>
Date: Mon, 12 Aug 2024 13:22:03 -0700
Subject: [PATCH 3/7] fix `uds_stream::epollhup` test

---
 tokio/tests/uds_stream.rs | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/tokio/tests/uds_stream.rs b/tokio/tests/uds_stream.rs
index b8c4e6a8eed..51403718528 100644
--- a/tokio/tests/uds_stream.rs
+++ b/tokio/tests/uds_stream.rs
@@ -406,6 +406,16 @@ async fn epollhup() -> io::Result<()> {
     drop(listener);
 
     let err = connect.await.unwrap_err();
-    assert_eq!(err.kind(), io::ErrorKind::ConnectionReset);
+
+    // On illumos (and presumably other Solaris descendents), `connect(3SOCKET)`
+    // returns `ECONNREFUSED` here rather than `ECONNRESET`.
+    //
+    // See: https://illumos.org/man/3SOCKET/connect
+    let expected_errno = if cfg!(target_os = "illumos") || cfg!(target_os = "solaris") {
+        io::ErrorKind::ConnectionRefused
+    } else {
+        io::ErrorKind::ConnectionReset
+    };
+    assert_eq!(err.kind(), expected_errno);
     Ok(())
 }

From ed03ae90a97f832a7571d14ce3cc6354a12d213f Mon Sep 17 00:00:00 2001
From: Eliza Weisman <eliza@elizas.website>
Date: Mon, 12 Aug 2024 13:23:23 -0700
Subject: [PATCH 4/7] ci: disable nextest failfast

---
 .github/buildomat/jobs/test-full.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/buildomat/jobs/test-full.sh b/.github/buildomat/jobs/test-full.sh
index 40f9920c631..e335aeeb528 100644
--- a/.github/buildomat/jobs/test-full.sh
+++ b/.github/buildomat/jobs/test-full.sh
@@ -38,7 +38,7 @@ banner build
 ptime -m cargo test --no-run --all --verbose --features full
 
 banner tests
-ptime -m cargo nextest run --features full
+ptime -m cargo nextest run --features full --no-fail-fast
 
 banner doctests
 ptime -m cargo test --doc --verbose --features full

From 35c1465555c72e8a5cf0e651e288e5db0713797f Mon Sep 17 00:00:00 2001
From: Eliza Weisman <eliza@elizas.website>
Date: Wed, 14 Aug 2024 11:34:29 -0700
Subject: [PATCH 5/7] make errno assertion less flaky

---
 tokio/tests/uds_stream.rs | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/tokio/tests/uds_stream.rs b/tokio/tests/uds_stream.rs
index 51403718528..cb40cbd1832 100644
--- a/tokio/tests/uds_stream.rs
+++ b/tokio/tests/uds_stream.rs
@@ -406,16 +406,17 @@ async fn epollhup() -> io::Result<()> {
     drop(listener);
 
     let err = connect.await.unwrap_err();
-
-    // On illumos (and presumably other Solaris descendents), `connect(3SOCKET)`
-    // returns `ECONNREFUSED` here rather than `ECONNRESET`.
-    //
-    // See: https://illumos.org/man/3SOCKET/connect
-    let expected_errno = if cfg!(target_os = "illumos") || cfg!(target_os = "solaris") {
-        io::ErrorKind::ConnectionRefused
-    } else {
-        io::ErrorKind::ConnectionReset
-    };
-    assert_eq!(err.kind(), expected_errno);
+    let errno = err.kind();
+    assert!(
+        // As far as I can tell, whether we see ECONNREFUSED or ECONNRESET here
+        // seems relatively inconsistent, at least on non-Linux operating
+        // systems. The difference in meaning between these errnos is not
+        // particularly well-defined, so let's just accept either.
+        matches!(
+            errno,
+            io::ErrorKind::ConnectionRefused | io::ErrorKind::ConnectionReset
+        ),
+        "unexpected error kind: {errno:?} (expected ConnectionRefused or ConnectionReset)"
+    );
     Ok(())
 }

From f18d6ed7d4e0724bbe14db5519d7c80b3227a1a9 Mon Sep 17 00:00:00 2001
From: Eliza Weisman <eliza@elizas.website>
Date: Wed, 14 Aug 2024 11:34:55 -0700
Subject: [PATCH 6/7] make io_async_fd tests always drain written amount

---
 tokio/tests/io_async_fd.rs | 29 +++++++++++++++++------------
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/tokio/tests/io_async_fd.rs b/tokio/tests/io_async_fd.rs
index 3ab1cebd884..a9f22a0e3d7 100644
--- a/tokio/tests/io_async_fd.rs
+++ b/tokio/tests/io_async_fd.rs
@@ -135,15 +135,17 @@ fn socketpair() -> (FileDescriptor, FileDescriptor) {
     fds
 }
 
-fn drain(mut fd: &FileDescriptor) {
+fn drain(mut fd: &FileDescriptor, mut amt: usize) {
     let mut buf = [0u8; 512];
-    #[allow(clippy::unused_io_amount)]
-    loop {
+    while amt > 0 {
         match fd.read(&mut buf[..]) {
-            Err(e) if e.kind() == ErrorKind::WouldBlock => break,
+            Err(e) if e.kind() == ErrorKind::WouldBlock => {}
             Ok(0) => panic!("unexpected EOF"),
             Err(e) => panic!("unexpected error: {:?}", e),
-            Ok(_) => continue,
+            Ok(x) => {
+                amt -= x;
+                continue;
+            }
         }
     }
 }
@@ -219,10 +221,10 @@ async fn reset_writable() {
     let mut guard = afd_a.writable().await.unwrap();
 
     // Write until we get a WouldBlock. This also clears the ready state.
-    while guard
-        .try_io(|_| afd_a.get_ref().write(&[0; 512][..]))
-        .is_ok()
-    {}
+    let mut bytes = 0;
+    while let Ok(Ok(amt)) = guard.try_io(|_| afd_a.get_ref().write(&[0; 512][..])) {
+        bytes += amt;
+    }
 
     // Writable state should be cleared now.
     let writable = afd_a.writable();
@@ -234,7 +236,7 @@ async fn reset_writable() {
     }
 
     // Read from the other side; we should become writable now.
-    drain(&b);
+    drain(&b, bytes);
 
     let _ = writable.await.unwrap();
 }
@@ -386,7 +388,10 @@ async fn poll_fns() {
     let afd_b = Arc::new(AsyncFd::new(b).unwrap());
 
     // Fill up the write side of A
-    while afd_a.get_ref().write(&[0; 512]).is_ok() {}
+    let mut bytes = 0;
+    while let Ok(amt) = afd_a.get_ref().write(&[0; 512]) {
+        bytes += amt;
+    }
 
     let waker = TestWaker::new();
 
@@ -446,7 +451,7 @@ async fn poll_fns() {
     }
 
     // Make it writable now
-    drain(afd_b.get_ref());
+    drain(afd_b.get_ref(), bytes);
 
     // now we should be writable (ie - the waker for poll_write should still be registered after we wake the read side)
     let _ = write_fut.await;

From 347b921a3df3ad7738176e67424c61948a5e2996 Mon Sep 17 00:00:00 2001
From: Eliza Weisman <eliza@elizas.website>
Date: Wed, 14 Aug 2024 12:54:28 -0700
Subject: [PATCH 7/7] task: illumos/Solaris have thread-local weirdness

## Motivation

In `tokio::task::local`, there's a
`LocalState::assert_called_from_owner_thread` function, which checks
that the caller's thread ID matches that of the thread on which the
`LocalSet` was created. This assertion is disabled on FreeBSD and
OpenBSD, with a comment indicating that this is because those systems
have "some weirdness around thread-local destruction". If memory serves,
I believe the "weirdness" was related to the order in which
thread-locals are destroyed relative to each other, or something along
those lines.

Upon running the tests on illumos, there appears to be [a similar
panic][1] in the `task_local_set_in_thread_local` due to this assertion
assertion while dropping a `LocalSet` which lives in a thread-local.
This leads me to believe that illumos, and presumably Solaris, also
exhibit the same "weirdness". This wouldn't be too surprising, given the
shared heritage of those systems.

## Solution

This commit adds `target_os="illumos"` and `target_os="solaris"` to the
`cfg` attribute that disables the assertion on systems determined to
have weirdness. This fixes the test panicking on illumos.

In the future, it might be worthwhile to look into changign the
assertion to only be disabled on weirdness-having systems _while
dropping the `LocalSet`_, rather than all the time. That way, we can
still check other accesses. But, I wanted to make the minimum change
necessary to fix it here before messing around with that.

[1]: https://buildomat.eng.oxide.computer/wg/0/details/01J592RB0JR203RGGN0RYHJHMY/CHieo1Uee7qzRVyp811YBl0MvXGO3i0QA9ezPaFWj6hf6P3P/01J592RSWCNX1MCFYGW74AHVH6#S1954
---
 tokio/src/task/local.rs | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/tokio/src/task/local.rs b/tokio/src/task/local.rs
index 08d89c49c03..baab6805b6c 100644
--- a/tokio/src/task/local.rs
+++ b/tokio/src/task/local.rs
@@ -1160,9 +1160,14 @@ impl LocalState {
 
     #[track_caller]
     fn assert_called_from_owner_thread(&self) {
-        // FreeBSD has some weirdness around thread-local destruction.
+        // BSDs and Solarises has some weirdness around thread-local destruction.
         // TODO: remove this hack when thread id is cleaned up
-        #[cfg(not(any(target_os = "openbsd", target_os = "freebsd")))]
+        #[cfg(not(any(
+            target_os = "openbsd",
+            target_os = "freebsd",
+            target_os = "illumos",
+            target_os = "solaris",
+        )))]
         debug_assert!(
             // if we couldn't get the thread ID because we're dropping the local
             // data, skip the assertion --- the `Drop` impl is not going to be