Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
114759: server: fix error handling in status server GetFiles r=xinhaoz a=xinhaoz

Previously, we ignored errors returned by the stat file call in status server's `GetFiles`. This can lead to a panic if the stat call returns an error as we dereference the nil file info object later on without any guards.

This commit patches the nil pointer dereference by returning any error encountered by stat immediately. Note that in the future we should probably change this to just ignore the current file path and continue with the rest of the files in the list. However, this is a good first patch to minimize backported code changes.

Fixes: cockroachdb#114453

Release note (bug fix): GetFiles should not cause a nil pointer dereference.

Co-authored-by: Xin Hao Zhang <[email protected]>
  • Loading branch information
craig[bot] and xinhaoz committed Nov 21, 2023
2 parents 88233d4 + b35dd80 commit 96987e9
Show file tree
Hide file tree
Showing 4 changed files with 112 additions and 4 deletions.
1 change: 1 addition & 0 deletions pkg/server/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,7 @@ go_test(
"critical_nodes_test.go",
"distsql_flows_test.go",
"drain_test.go",
"get_local_files_test.go",
"graphite_test.go",
"grpc_gateway_test.go",
"helpers_test.go",
Expand Down
99 changes: 99 additions & 0 deletions pkg/server/get_local_files_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
// Copyright 2023 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package server

import (
"os"
"path/filepath"
"testing"

"github.com/cockroachdb/cockroach/pkg/server/serverpb"
"github.com/cockroachdb/cockroach/pkg/testutils"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/errors"
"github.com/stretchr/testify/require"
)

// TestGetLocalFiles tests that retrieving local heap and goroutine profiles
// returns and reads files when they exist and returns the expected error
// when the file on encountered errors.
// Note that there is more extensive end to end testing of this done in
// pkg/server/storage_api/files_test.go. This test was created to test the
// error handling in response to #114453.
func TestGetLocalFiles(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

// We'll read a file successfully first to ensure the read logic is working.
tempDir, cleanupFn := testutils.TempDir(t)
defer cleanupFn()

testHeapDir := filepath.Join(tempDir, "heap_profiles")
if err := os.MkdirAll(testHeapDir, os.ModePerm); err != nil {
t.Fatal(err)
}

t.Run("read files successfully", func(t *testing.T) {
testHeapFile := filepath.Join(testHeapDir, "im-in-the-heap-dir")
if err := os.WriteFile(testHeapFile, []byte("heap stuff"), 0644); err != nil {
t.Fatal(err)
}
req := &serverpb.GetFilesRequest{
NodeId: "local", Type: serverpb.FileType_HEAP, Patterns: []string{"*"}}
res, err := getLocalFiles(req, testHeapDir, "", os.Stat, os.ReadFile)
require.NoError(t, err)
require.Equal(t, 1, len(res.Files))
require.Equal(t, "im-in-the-heap-dir", res.Files[0].Name)
})

t.Run("stat file error", func(t *testing.T) {
testHeapFile := filepath.Join(testHeapDir, "im-in-the-heap-dir")
if err := os.WriteFile(testHeapFile, []byte("heap stuff"), 0644); err != nil {
t.Fatal(err)
}
// Test that getLocalFiles returns an error when the file cannot be read with stat.
statFileWithErr := func(name string) (os.FileInfo, error) {
return nil, errors.Errorf("stat error")
}
req := &serverpb.GetFilesRequest{
NodeId: "local", Type: serverpb.FileType_HEAP, Patterns: []string{"*"}}
_, err := getLocalFiles(req, testHeapDir, "", statFileWithErr, os.ReadFile)
require.ErrorContains(t, err, "stat error")
})

t.Run("read file error", func(t *testing.T) {
testHeapFile := filepath.Join(testHeapDir, "im-in-the-heap-dir")
if err := os.WriteFile(testHeapFile, []byte("heap stuff"), 0644); err != nil {
t.Fatal(err)
}
// Test that getLocalFiles returns an error when the file cannot be read.
readFileWithErr := func(name string) ([]byte, error) {
return nil, errors.Errorf("read error")
}
req := &serverpb.GetFilesRequest{
NodeId: "local", Type: serverpb.FileType_HEAP, Patterns: []string{"*"}}
_, err := getLocalFiles(req, testHeapDir, "", os.Stat, readFileWithErr)
require.ErrorContains(t, err, "read error")
})

t.Run("dirs not implemented", func(t *testing.T) {
req := &serverpb.GetFilesRequest{
NodeId: "local", Type: serverpb.FileType_HEAP, Patterns: []string{"*"}}
_, err := getLocalFiles(req, "", "nonexistent", os.Stat, os.ReadFile)
require.ErrorContains(t, err, "dump directory not configured")

req = &serverpb.GetFilesRequest{
NodeId: "local", Type: serverpb.FileType_GOROUTINES, Patterns: []string{"*"}}
_, err = getLocalFiles(req, "nonexistent", "", os.Stat, os.ReadFile)
require.ErrorContains(t, err, "dump directory not configured")
})
}
3 changes: 2 additions & 1 deletion pkg/server/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -1194,7 +1194,8 @@ func (s *statusServer) GetFiles(
return status.GetFiles(ctx, req)
}

return getLocalFiles(req, s.sqlServer.cfg.HeapProfileDirName, s.sqlServer.cfg.GoroutineDumpDirName)
return getLocalFiles(req, s.sqlServer.cfg.HeapProfileDirName, s.sqlServer.cfg.GoroutineDumpDirName,
os.Stat, os.ReadFile)
}

// checkFilePattern checks if a pattern is acceptable for the GetFiles call.
Expand Down
13 changes: 10 additions & 3 deletions pkg/server/status_local_file_retrieval.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,11 @@ func stacksLocal(req *serverpb.StacksRequest) (*serverpb.JSONResponse, error) {
// getLocalFiles retrieves the requested files for the local node. This method
// returns a gRPC error to the caller.
func getLocalFiles(
req *serverpb.GetFilesRequest, heapProfileDirName string, goroutineDumpDirName string,
req *serverpb.GetFilesRequest,
heapProfileDirName string,
goroutineDumpDirName string,
statFileFn func(string) (os.FileInfo, error),
readFileFn func(string) ([]byte, error),
) (*serverpb.GetFilesResponse, error) {
var dir string
switch req.Type {
Expand All @@ -154,10 +158,13 @@ func getLocalFiles(
}

for _, path := range filepaths {
fileinfo, _ := os.Stat(path)
fileinfo, err := statFileFn(path)
if err != nil {
return nil, status.Errorf(codes.Internal, err.Error())
}
var contents []byte
if !req.ListOnly {
contents, err = os.ReadFile(path)
contents, err = readFileFn(path)
if err != nil {
return nil, status.Errorf(codes.Internal, err.Error())
}
Expand Down

0 comments on commit 96987e9

Please sign in to comment.