Skip to content

Commit

Permalink
feat(gpud): rename/add "run --web-enable --enable-auto-update" flags
Browse files Browse the repository at this point in the history
Signed-off-by: Gyuho Lee <[email protected]>
  • Loading branch information
gyuho committed Aug 22, 2024
1 parent 0b331cf commit fbf83db
Show file tree
Hide file tree
Showing 6 changed files with 36 additions and 30 deletions.
15 changes: 11 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ sudo gpud login --token <LEPTON_AI_TOKEN>

To access the local web UI, open https://localhost:15132 in your browser.

If run with `gpud up`, you may disable this local web UI by setting `FLAGS="--web-enable=false"` to the `/etc/default/gpud` environment file and restart the service.

#### If your system doesn't have systemd

To run on Mac (without systemd):
Expand All @@ -91,9 +93,6 @@ sudo rm /usr/sbin/gpud
sudo rm /etc/systemd/system/gpud.service
```

## Integration

For users looking to set up a platform to collect and process data from gpud, please refer to [INTEGRATION](./docs/INTEGRATION.md).
## Key Features

- Monitor critical GPU and GPU fabric metrics (power, temperature).
Expand All @@ -103,14 +102,22 @@ For users looking to set up a platform to collect and process data from gpud, pl

Check out [*components*](./docs/COMPONENTS.md) for a detailed list of components and their features.

## Integration

For users looking to set up a platform to collect and process data from gpud, please refer to [INTEGRATION](./docs/INTEGRATION.md).

## FAQs

### Does GPUd send data to lepton.ai?

GPUd collects a small anonymous usage signal by default to help the engineering team better understand usage frequencies. The data is strictly anonymized and **does not contain any senstive data**. You can disable this behavior by setting `GPUD_NO_USAGE_STATS=true`. If GPUd is run with systemd (default option for the `gpud up` command), you can add `GPUD_NO_USAGE_STATS=true` to the `/etc/default/gpud` environment file.
GPUd collects a small anonymous usage signal by default to help the engineering team better understand usage frequencies. The data is strictly anonymized and **does not contain any senstive data**. You can disable this behavior by setting `GPUD_NO_USAGE_STATS=true`. If GPUd is run with systemd (default option for the `gpud up` command), you can add the line `GPUD_NO_USAGE_STATS=true` to the `/etc/default/gpud` environment file and restart the service.

If you opt-in to log in to the Lepton AI platform, to assist you with more helpful GPU health states, GPUd periodically sends system runtime related information about the host to the platform. All these info are system workload and health info, and contain no user data. The data are sent via secure channels.

### How to update GPUd?

GPUd is still in active development, regularly releasing new versions for critical bug fixes and new features. We strongly recommend always being on the latest version of GPUd. When GPUd is registered with the Lepton platform, the platform will automatically update GPUd to the latest version. To enable auto-updates, if GPUd is run with systemd (default option for the `gpud up` command), you may add the flag `FLAGS="--enable-auto-update=false"` to the `/etc/default/gpud` environment file and restart the service.

## Learn more

- [Why GPUd](./docs/WHY.md)
Expand Down
34 changes: 17 additions & 17 deletions cmd/gpud/command/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,12 @@ var (

retentionPeriod time.Duration

webDisable bool
webEnable bool
webAdmin bool
webRefreshPeriod time.Duration

tailLines int
disableArchive bool
tailLines int
createArchive bool

enableAutoUpdate bool
)
Expand Down Expand Up @@ -138,7 +138,7 @@ sudo rm /etc/systemd/system/gpud.service
},
&cli.BoolFlag{
Name: "pprof",
Usage: "enable pprof",
Usage: "enable pprof (default: false)",
Destination: &pprof,
},
&cli.DurationFlag{
Expand All @@ -147,14 +147,14 @@ sudo rm /etc/systemd/system/gpud.service
Destination: &retentionPeriod,
Value: config.DefaultRetentionPeriod.Duration,
},
&cli.BoolFlag{
Name: "web-disable",
Usage: "disable local web interface",
Destination: &webDisable,
&cli.BoolTFlag{
Name: "web-enable",
Usage: "enable local web interface (default: true)",
Destination: &webEnable,
},
&cli.BoolFlag{
Name: "web-admin",
Usage: "enable admin interface",
Usage: "enable admin interface (default: false)",
Destination: &webAdmin,
},
&cli.DurationFlag{
Expand All @@ -170,7 +170,7 @@ sudo rm /etc/systemd/system/gpud.service
},
&cli.BoolTFlag{
Name: "enable-auto-update",
Usage: "enable auto update",
Usage: "enable auto update of gpud (default: true)",
Destination: &enableAutoUpdate,
},
},
Expand Down Expand Up @@ -210,11 +210,11 @@ sudo rm /etc/systemd/system/gpud.service
Action: cmdReleaseGenKey,
Flags: []cli.Flag{
cli.BoolFlag{
Name: "root",
Name: "root (default: false)",
Usage: "generate root key",
},
cli.BoolFlag{
Name: "signing",
Name: "signing (default: false)",
Usage: "generate signing key",
},
cli.StringFlag{
Expand Down Expand Up @@ -339,10 +339,10 @@ cat summary.txt
Action: cmdDiagnose,
Aliases: []string{"d"},
Flags: []cli.Flag{
&cli.BoolFlag{
Name: "disable-archive",
Usage: "disable archive of diagnose information",
Destination: &disableArchive,
&cli.BoolTFlag{
Name: "create-archive (default: true)",
Usage: "create .tar archive of diagnose information",
Destination: &createArchive,
},
},
},
Expand All @@ -359,7 +359,7 @@ cat summary.txt
},
&cli.BoolFlag{
Name: "debug",
Usage: "enable debug mode",
Usage: "enable debug mode (default: false)",
Destination: &debug,
},
},
Expand Down
2 changes: 1 addition & 1 deletion cmd/gpud/command/diagnose.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ func cmdDiagnose(cliContext *cli.Context) error {

ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
defer cancel()
err := diagnose.Run(ctx)
err := diagnose.Run(ctx, diagnose.WithCreateArchive(createArchive))
if err != nil {
return err
}
Expand Down
5 changes: 2 additions & 3 deletions cmd/gpud/command/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,8 @@ func cmdRun(cliContext *cli.Context) error {
cfg.RetentionPeriod = metav1.Duration{Duration: retentionPeriod}
cfg.Web.SincePeriod = metav1.Duration{Duration: retentionPeriod}
}
if webDisable {
cfg.Web.Enable = false
}

cfg.Web.Enable = webEnable
if webAdmin {
cfg.Web.Admin = true
}
Expand Down
8 changes: 4 additions & 4 deletions components/diagnose/diagnose.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import (
)

type Op struct {
disableArchive bool
createArchive bool
}

type OpOption func(*Op)
Expand All @@ -32,9 +32,9 @@ func (op *Op) applyOpts(opts []OpOption) error {
return nil
}

func WithDisableArchive(b bool) OpOption {
func WithCreateArchive(b bool) OpOption {
return func(op *Op) {
op.disableArchive = b
op.createArchive = b
}
}

Expand Down Expand Up @@ -393,7 +393,7 @@ func run(ctx context.Context, dir string, opts ...OpOption) error {
return err
}

if !op.disableArchive {
if op.createArchive {
// tar the directory into a single file
tarFileName := dir + ".tar"
if err := tarDirectory(dir, tarFileName); err != nil {
Expand Down
2 changes: 1 addition & 1 deletion e2e/e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ func TestGpudHealthzInfo(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()

cmd := exec.CommandContext(ctx, os.Getenv("GPUD_BIN"), "run", "--log-level=debug", "--web-disable", "--annotations", fmt.Sprintf("{%q:%q}", randKey, randVal), fmt.Sprintf("--listen-address=%s", ep))
cmd := exec.CommandContext(ctx, os.Getenv("GPUD_BIN"), "run", "--log-level=debug", "--web-enable=false", "--enable-auto-update=false", "--annotations", fmt.Sprintf("{%q:%q}", randKey, randVal), fmt.Sprintf("--listen-address=%s", ep))
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr

Expand Down

0 comments on commit fbf83db

Please sign in to comment.