Skip to content

Commit

Permalink
fix(server-manager): kill orphaned cartesi-machine
Browse files Browse the repository at this point in the history
Adds a workaround for a problem in server-manager where the cartesi-machine keeps running after server-manager exits. For more information, check #201.
  • Loading branch information
torives committed Jan 31, 2024
1 parent 81421d4 commit 5633df4
Showing 1 changed file with 57 additions and 4 deletions.
61 changes: 57 additions & 4 deletions internal/services/server-manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@ type ServerManager struct {

// Environment variables.
Env []string

// PID of the Cartesi Machine spawned by the server-manager. Used to manually
// stop the orphaned process after server-manager exits. For more information,
// check https://github.com/cartesi/server-manager/issues/18
machinePid int
}

const waitDelay = 200 * time.Millisecond
Expand All @@ -38,25 +43,39 @@ func (s ServerManager) Start(ctx context.Context, ready chan<- struct{}) error {
cmd.Env = s.Env
cmd.Stderr = newLineWriter(commandLogger{s.Name})
cmd.Stdout = newLineWriter(commandLogger{s.Name})
// Without a delay, cmd.Wait() will block forever waiting for the I/O pipes
// to be closed
cmd.WaitDelay = waitDelay
cmd.Cancel = func() error {
err := cmd.Process.Signal(syscall.SIGTERM)
if err != nil {
msg := "failed to send SIGTERM to %v: %v\n"
config.WarningLogger.Printf(msg, s, err)
config.WarningLogger.Printf("failed to send SIGTERM to %v: %v\n", s, err)
}
// If we successfully obtained the PID, kill the orphaned cartesi-machine process
if s.machinePid != 0 {
if err := syscall.Kill(s.machinePid, syscall.SIGTERM); err != nil {
config.WarningLogger.Printf("failed to kill cartesi-machine process: %v\n", err)
}
}
return err
}

if err := cmd.Start(); err != nil {
return err
}

go s.pollTcp(ctx, ready)
err := cmd.Run()
go s.storeCartesiMachinePid(ctx, cmd.Process.Pid)

err := cmd.Wait()

if ctx.Err() != nil {
return ctx.Err()
}
return err
}

// Blocks until the service is ready or the context is canceled.
// Blocks until the service is ready or the context is canceled
func (s ServerManager) pollTcp(ctx context.Context, ready chan<- struct{}) {
ctx, cancel := context.WithCancel(ctx)
defer cancel()
Expand All @@ -76,6 +95,40 @@ func (s ServerManager) pollTcp(ctx context.Context, ready chan<- struct{}) {
}
}

// Blocks until it successfully stores Cartesi Machine's PID,
// the context is canceled or it times out, whichever comes first
func (s *ServerManager) storeCartesiMachinePid(ctx context.Context, ppid int) {
ctx, cancel := context.WithTimeout(ctx, DefaultServiceTimeout)
defer cancel()
for {
childPid, err := getChildPid(ppid)
if err == nil {
config.DebugLogger.Println("stored cartesi-machine PID")
s.machinePid = childPid
return
}
select {
case <-ctx.Done():
config.DebugLogger.Println("failed to store cartesi-machine PID")
return
case <-time.After(time.Second):
}
}
}

func (s ServerManager) String() string {
return s.Name
}

func getChildPid(pid int) (int, error) {
output, err := exec.Command("pgrep", "-P", fmt.Sprint(pid)).CombinedOutput()
if err != nil {
return 0, fmt.Errorf("failed to exec pgrep: %v: %v", err, string(output))
}
var childPid int
_, err = fmt.Sscanf(string(output), "%d\n", &childPid)
if err != nil {
return 0, fmt.Errorf("failed to parse pid: %v", err)
}
return childPid, err
}

0 comments on commit 5633df4

Please sign in to comment.