diff --git a/pkg/cmd/roachtest/tests/db_console.go b/pkg/cmd/roachtest/tests/db_console.go index 8fe2d1a49edb..359b278997ab 100644 --- a/pkg/cmd/roachtest/tests/db_console.go +++ b/pkg/cmd/roachtest/tests/db_console.go @@ -114,7 +114,7 @@ func (d *dbConsoleCypressTest) RunTest(ctx context.Context, targetNode int, l *l url := fmt.Sprintf("https://%s", adminUIAddrs[0]) require.NoError(d.t, rtCluster.RunE(ctx, option.WithNodes(workloadNode), "mkdir", "-p", d.artifactPath)) dockerRun := fmt.Sprintf( - `docker run -v %s:/e2e/artifacts %s --config baseUrl=%s,screenshotsFolder=/e2e/artifacts,videosFolder=/e2e/artifacts %s`, + `docker run -e NO_COLOR=1 -v %s:/e2e/artifacts %s --config baseUrl=%s,screenshotsFolder=/e2e/artifacts,videosFolder=/e2e/artifacts %s`, d.artifactPath, d.imageName, url, specStr) // If the Docker run fails, get the test failure artifacts and write them to // roachtest's artifact directory. @@ -123,7 +123,7 @@ func (d *dbConsoleCypressTest) RunTest(ctx context.Context, targetNode int, l *l if mkDirErr := os.MkdirAll(testArtifactsDir, 0777); mkDirErr != nil { d.t.Fatal(mkDirErr) } - require.NoError(d.t, rtCluster.Get(ctx, d.t.L(), d.artifactPath, testArtifactsDir, workloadNode)) + require.NoError(d.t, rtCluster.Get(context.Background(), d.t.L(), d.artifactPath, testArtifactsDir, workloadNode)) d.t.Fatal(err) } } @@ -195,11 +195,16 @@ func (d *dbConsoleCypressTest) writeCypressFilesToWorkloadNode(ctx context.Conte } func registerDbConsole(r registry.Registry) { + // Explicitly set CockroachBinary to registry.StandardCockroach to ensure that a binary + // containing db console is used. Currently, registry.RuntimeAssertionsCockroach + // is built using cockroach-short and the default of registry.RandomizedCockroach + // causes the tests to be flaky r.Add(registry.TestSpec{ Name: "db-console/mixed-version-cypress", Owner: registry.OwnerObservability, Cluster: r.MakeClusterSpec(5, spec.WorkloadNode()), CompatibleClouds: registry.AllClouds, + CockroachBinary: registry.StandardCockroach, Suites: registry.Suites(registry.Nightly), Randomized: false, Run: runDbConsoleCypressMixedVersions, @@ -210,6 +215,7 @@ func registerDbConsole(r registry.Registry) { Owner: registry.OwnerObservability, Cluster: r.MakeClusterSpec(4, spec.WorkloadNode()), CompatibleClouds: registry.AllClouds, + CockroachBinary: registry.StandardCockroach, Suites: registry.Suites(registry.Nightly), Randomized: false, Run: runDbConsoleCypress, diff --git a/pkg/cmd/roachtest/tests/gossip.go b/pkg/cmd/roachtest/tests/gossip.go index c35090930d9f..570ea1c99155 100644 --- a/pkg/cmd/roachtest/tests/gossip.go +++ b/pkg/cmd/roachtest/tests/gossip.go @@ -12,6 +12,8 @@ import ( "net" "net/url" "os" + "path/filepath" + "runtime/trace" "sort" "strconv" "strings" @@ -54,8 +56,14 @@ SELECT node_id WHERE expiration > now(); ` + tBeforePing := timeutil.Now() db := c.Conn(ctx, t.L(), node) defer db.Close() + require.NoError(t, db.Ping()) + tAfterPing := timeutil.Now() + if pingDur := tAfterPing.Sub(tBeforePing); pingDur > 20*time.Second { + t.L().Printf("sql connection ready after %.2fs", pingDur.Seconds()) + } rows, err := db.Query(query) if err != nil { @@ -68,7 +76,11 @@ SELECT node_id require.NotZero(t, nodeID) nodes = append(nodes, nodeID) } + require.NoError(t, rows.Err()) sort.Ints(nodes) + if scanDur := timeutil.Since(tAfterPing); scanDur > 20*time.Second { + t.L().Printf("query processed after %.2fs", scanDur.Seconds()) + } return nodes } @@ -145,10 +157,24 @@ SELECT node_id waitForGossip(0) nodes := c.All() + for j := 0; j < 10; j++ { + traceFile := filepath.Join(t.ArtifactsDir(), "trace_"+strconv.Itoa(j)+".bin") + f, err := os.Create(traceFile) + require.NoError(t, err) + if err := trace.Start(f); err != nil { + _ = f.Close() + f = nil + _ = os.Remove(traceFile) + } deadNode := nodes.RandNode()[0] c.Stop(ctx, t.L(), option.DefaultStopOpts(), c.Node(deadNode)) waitForGossip(deadNode) + if f != nil { + trace.Stop() + _ = f.Close() + t.L().Printf("execution trace: %s", traceFile) + } c.Start(ctx, t.L(), startOpts, install.MakeClusterSettings(), c.Node(deadNode)) } } diff --git a/pkg/kv/txn.go b/pkg/kv/txn.go index ee0082a3c941..b5ac7245f710 100644 --- a/pkg/kv/txn.go +++ b/pkg/kv/txn.go @@ -11,6 +11,7 @@ import ( "math" "time" + "github.com/cockroachdb/cockroach/pkg/base" "github.com/cockroachdb/cockroach/pkg/kv/kvpb" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/closedts" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency/isolation" @@ -23,6 +24,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/cockroach/pkg/util/protoutil" + "github.com/cockroachdb/cockroach/pkg/util/retry" "github.com/cockroachdb/cockroach/pkg/util/syncutil" "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/cockroachdb/cockroach/pkg/util/uuid" @@ -1045,7 +1047,10 @@ func (e *AutoCommitError) Error() string { func (txn *Txn) exec(ctx context.Context, fn func(context.Context, *Txn) error) (err error) { // Run fn in a retry loop until we encounter a success or // error condition this loop isn't capable of handling. - for attempt := 1; ; attempt++ { + retryOpts := base.DefaultRetryOptions() + retryOpts.InitialBackoff = 20 * time.Millisecond + retryOpts.MaxBackoff = 200 * time.Millisecond + for r := retry.Start(retryOpts); r.Next(); { if err := ctx.Err(); err != nil { return errors.Wrap(err, "txn exec") } @@ -1115,6 +1120,8 @@ func (txn *Txn) exec(ctx context.Context, fn func(context.Context, *Txn) error) // txn.db.ctx.Settings == nil is only expected in tests. maxRetries = int(MaxInternalTxnAutoRetries.Get(&txn.db.ctx.Settings.SV)) } + // Add 1 because r.CurrentAttempt() starts at 0. + attempt := r.CurrentAttempt() + 1 if attempt > maxRetries { // If the retries limit has been exceeded, rollback and return an error. rollbackErr := txn.Rollback(ctx)