Skip to content

Commit

Permalink
chore: Add base image (#219)
Browse files Browse the repository at this point in the history
* chore: Add base image

Signed-off-by: Ce Gao <[email protected]>

* chore: Merge some operations into the base image

Signed-off-by: Ce Gao <[email protected]>

* chore: Add some dev tools

Signed-off-by: Ce Gao <[email protected]>

* fix: Fix lint

Signed-off-by: Ce Gao <[email protected]>

* fix: Fix test cases

Signed-off-by: Ce Gao <[email protected]>

* fix: Fix test cases

Signed-off-by: Ce Gao <[email protected]>

* fix: Use a email group

Signed-off-by: Ce Gao <[email protected]>
  • Loading branch information
gaocegege authored May 31, 2022
1 parent 855a5f5 commit 8e1e498
Show file tree
Hide file tree
Showing 12 changed files with 159 additions and 84 deletions.
2 changes: 2 additions & 0 deletions base-images/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
build.sh
*.Dockerfile
26 changes: 26 additions & 0 deletions base-images/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/usr/bin/env bash

ROOT_DIR=`dirname $0`

cd ${ROOT_DIR}
# ubuntu 22.04 build require moby/buildkit version greater than 0.8.1
if ! docker buildx inspect cuda; then
docker buildx create --use --platform linux/x86_64,linux/arm64,linux/ppc64le --driver-opt image=moby/buildkit:v0.10.3 --name cuda --node cuda
fi
docker buildx build --build-arg IMAGE_NAME=docker.io/nvidia/cuda \
--build-arg ENVD_VERSION=0.0.1-alpha.5 \
--build-arg ENVD_SSH_IMAGE=ghcr.io/tensorchord/envd-ssh-from-scratch \
--build-arg HTTP_PROXY=${HTTP_PROXY} \
--build-arg HTTPS_PROXY=${HTTPS_PROXY} \
--pull --push --platform linux/x86_64,linux/arm64 \
-t gaocegege/python:3.8-ubuntu20.04-cuda11.6-cudnn8 \
-f python3.8-ubuntu20.04-cuda11.6.Dockerfile .
docker buildx build \
--build-arg ENVD_VERSION=0.0.1-alpha.5 \
--build-arg ENVD_SSH_IMAGE=ghcr.io/tensorchord/envd-ssh-from-scratch \
--build-arg HTTP_PROXY=${HTTP_PROXY} \
--build-arg HTTPS_PROXY=${HTTPS_PROXY} \
--pull --push --platform linux/x86_64,linux/arm64 \
-t gaocegege/python:3.8-ubuntu20.04 \
-f python3.8-ubuntu20.04.Dockerfile .
cd - > /dev/null
84 changes: 84 additions & 0 deletions base-images/python3.8-ubuntu20.04-cuda11.6.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
ARG IMAGE_NAME
ARG ENVD_VERSION
ARG ENVD_SSH_IMAGE
FROM ${IMAGE_NAME}:11.6.2-runtime-ubuntu20.04 as base

ENV NV_CUDA_LIB_VERSION "11.6.2-1"

FROM base as base-amd64

ENV NV_CUDA_CUDART_DEV_VERSION 11.6.55-1
ENV NV_NVML_DEV_VERSION 11.6.55-1
ENV NV_LIBCUSPARSE_DEV_VERSION 11.7.2.124-1
ENV NV_LIBNPP_DEV_VERSION 11.6.3.124-1
ENV NV_LIBNPP_DEV_PACKAGE libnpp-dev-11-6=${NV_LIBNPP_DEV_VERSION}

ENV NV_LIBCUBLAS_DEV_VERSION 11.9.2.110-1
ENV NV_LIBCUBLAS_DEV_PACKAGE_NAME libcublas-dev-11-6
ENV NV_LIBCUBLAS_DEV_PACKAGE ${NV_LIBCUBLAS_DEV_PACKAGE_NAME}=${NV_LIBCUBLAS_DEV_VERSION}

ENV NV_NVPROF_VERSION 11.6.124-1
ENV NV_NVPROF_DEV_PACKAGE cuda-nvprof-11-6=${NV_NVPROF_VERSION}

ENV NV_LIBNCCL_DEV_PACKAGE_NAME libnccl-dev
ENV NV_LIBNCCL_DEV_PACKAGE_VERSION 2.12.10-1
ENV NCCL_VERSION 2.12.10-1
ENV NV_LIBNCCL_DEV_PACKAGE ${NV_LIBNCCL_DEV_PACKAGE_NAME}=${NV_LIBNCCL_DEV_PACKAGE_VERSION}+cuda11.6
FROM base as base-arm64

ENV NV_CUDA_CUDART_DEV_VERSION 11.6.55-1
ENV NV_NVML_DEV_VERSION 11.6.55-1
ENV NV_LIBCUSPARSE_DEV_VERSION 11.7.2.124-1
ENV NV_LIBNPP_DEV_VERSION 11.6.3.124-1
ENV NV_LIBNPP_DEV_PACKAGE libnpp-dev-11-6=${NV_LIBNPP_DEV_VERSION}

ENV NV_LIBCUBLAS_DEV_PACKAGE_NAME libcublas-dev-11-6
ENV NV_LIBCUBLAS_DEV_VERSION 11.9.2.110-1
ENV NV_LIBCUBLAS_DEV_PACKAGE ${NV_LIBCUBLAS_DEV_PACKAGE_NAME}=${NV_LIBCUBLAS_DEV_VERSION}

ENV NV_NVPROF_VERSION 11.6.124-1
ENV NV_NVPROF_DEV_PACKAGE cuda-nvprof-11-6=${NV_NVPROF_VERSION}

ENV NV_LIBNCCL_DEV_PACKAGE_NAME libnccl-dev
ENV NV_LIBNCCL_DEV_PACKAGE_VERSION 2.12.10-1
ENV NCCL_VERSION 2.12.10-1
ENV NV_LIBNCCL_DEV_PACKAGE ${NV_LIBNCCL_DEV_PACKAGE_NAME}=${NV_LIBNCCL_DEV_PACKAGE_VERSION}+cuda11.6

FROM ${ENVD_SSH_IMAGE}:${ENVD_VERSION} AS envd

FROM base-${TARGETARCH}

ARG TARGETARCH

LABEL maintainer "envd-maintainers <[email protected]>"

ENV DEBIAN_FRONTEND noninteractive

RUN apt-get update && \
apt-get install -y --no-install-recommends --no-install-suggests --fix-missing bash-static \
apt-utils libtinfo5 libncursesw5 && \
apt-get install -y --no-install-recommends --no-install-suggests --fix-missing bash-static \
cuda-cudart-dev-11-6=${NV_CUDA_CUDART_DEV_VERSION} \
cuda-command-line-tools-11-6=${NV_CUDA_LIB_VERSION} \
cuda-minimal-build-11-6=${NV_CUDA_LIB_VERSION} \
cuda-libraries-dev-11-6=${NV_CUDA_LIB_VERSION} \
cuda-nvml-dev-11-6=${NV_NVML_DEV_VERSION} \
${NV_NVPROF_DEV_PACKAGE} \
${NV_LIBNPP_DEV_PACKAGE} \
libcusparse-dev-11-6=${NV_LIBCUSPARSE_DEV_VERSION} \
${NV_LIBCUBLAS_DEV_PACKAGE} \
${NV_LIBNCCL_DEV_PACKAGE} \
&& rm -rf /var/lib/apt/lists/*

RUN apt-get update && \
apt-get install -y --no-install-recommends --no-install-suggests --fix-missing bash-static \
# envd dependencies
python3 curl openssh-client git tini sudo python3-pip zsh vim \
&& rm -rf /var/lib/apt/lists/*

COPY --from=envd /usr/bin/envd-ssh /var/envd/bin/envd-ssh

# Keep apt from auto upgrading the cublas and nccl packages. See https://gitlab.com/nvidia/container-images/cuda/-/issues/88
RUN apt-mark hold ${NV_LIBCUBLAS_DEV_PACKAGE_NAME} ${NV_LIBNCCL_DEV_PACKAGE_NAME}

ENV LIBRARY_PATH /usr/local/cuda/lib64/stubs
26 changes: 26 additions & 0 deletions base-images/python3.8-ubuntu20.04.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
ARG ENVD_VERSION
ARG ENVD_SSH_IMAGE
FROM ubuntu:20.04 as base

FROM base as base-amd64

FROM base as base-arm64

FROM ${ENVD_SSH_IMAGE}:${ENVD_VERSION} AS envd

FROM base-${TARGETARCH}

ARG TARGETARCH

LABEL maintainer "envd-maintainers <[email protected]>"

ENV DEBIAN_FRONTEND noninteractive

RUN apt-get update && \
apt-get install -y --no-install-recommends --no-install-suggests --fix-missing bash-static \
apt-utils libtinfo5 libncursesw5 \
# envd dependencies
python3 curl openssh-client git tini sudo python3-pip zsh vim \
&& rm -rf /var/lib/apt/lists/*

COPY --from=envd /usr/bin/envd-ssh /var/envd/bin/envd-ssh
1 change: 0 additions & 1 deletion cmd/envd/build.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,6 @@ func build(clicontext *cli.Context) error {
"tag": tag,
flag.FlagBuildkitdImage: viper.GetString(flag.FlagBuildkitdImage),
flag.FlagBuildkitdContainer: viper.GetString(flag.FlagBuildkitdContainer),
flag.FlagSSHImage: viper.GetString(flag.FlagSSHImage),
})
logger.Debug("starting build command")

Expand Down
8 changes: 0 additions & 8 deletions cmd/envd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,6 @@ func run(args []string) (bool, error) {
Usage: "buildkitd container to use for buildkitd",
Value: "envd_buildkitd",
},
&cli.StringFlag{
Name: flag.FlagSSHImage,
Usage: "Download the envd-ssh image",
// TODO(gaocegege): Use version.Version to generate the right URL.
Value: "ghcr.io/tensorchord/envd-ssh-from-scratch:0.0.1-alpha.5",
Hidden: true,
},
}

app.Commands = []*cli.Command{
Expand Down Expand Up @@ -94,7 +87,6 @@ func run(args []string) (bool, error) {
// TODO(gaocegege): Add a config struct to keep them.
viper.Set(flag.FlagBuildkitdContainer, context.String(flag.FlagBuildkitdContainer))
viper.Set(flag.FlagBuildkitdImage, context.String(flag.FlagBuildkitdImage))
viper.Set(flag.FlagSSHImage, context.String(flag.FlagSSHImage))
return nil
}
return debugEnabled, app.Run(args)
Expand Down
1 change: 0 additions & 1 deletion cmd/envd/up.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,6 @@ func up(clicontext *cli.Context) error {
"detach": detach,
flag.FlagBuildkitdImage: viper.GetString(flag.FlagBuildkitdImage),
flag.FlagBuildkitdContainer: viper.GetString(flag.FlagBuildkitdContainer),
flag.FlagSSHImage: viper.GetString(flag.FlagSSHImage),
})
logger.Debug("starting up command")

Expand Down
2 changes: 0 additions & 2 deletions pkg/builder/builder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,9 @@ var _ = Describe("Builder", func() {
buildContext = "testdata"
tag = "envd-dev:test"
viper.Set(flag.FlagBuildkitdContainer, "envd_buildkitd")
viper.Set(flag.FlagSSHImage, "envd-ssh:latest")
os.Setenv("DOCKER_API_VERSION", "1.41")
DeferCleanup(func() {
viper.Set(flag.FlagBuildkitdContainer, "")
viper.Set(flag.FlagSSHImage, "")
})
})
When("getting the wrong builtkitd address", func() {
Expand Down
1 change: 0 additions & 1 deletion pkg/flag/consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,4 @@ const (
FlagContextDir = "context-dir"
FlagBuildkitdImage = "buildkitd-image"
FlagBuildkitdContainer = "buildkitd-container-name"
FlagSSHImage = "envd-ssh-image"
)
22 changes: 8 additions & 14 deletions pkg/lang/ir/compile.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,6 @@ func NewGraph() *Graph {
Language: languageDefault,
CUDA: nil,
CUDNN: nil,
BuiltinSystemPackages: []string{
// TODO(gaocegege): Move them into the base image.
"curl",
"openssh-client",
"git",
"sudo",
"tini",
},

PyPIPackages: []string{},
SystemPackages: []string{},
Expand Down Expand Up @@ -100,17 +92,19 @@ func (g Graph) Compile() (llb.State, error) {
pypiMirrorStage := g.compilePyPIMirror(aptStage)

g.compileJupyter()
// TODO(gaocegege): Make apt update a seperate stage to
// parallel system and user-defined package installation.
builtinSystemStage := g.compileBuiltinSystemPackages(pypiMirrorStage)
builtinSystemStage := pypiMirrorStage
sshStage, err := g.copySSHKey(builtinSystemStage)
if err != nil {
return llb.State{}, errors.Wrap(err, "failed to copy ssh keys")
}
shellStage, err := g.compileShell(builtinSystemStage)
if err != nil {
return llb.State{}, errors.Wrap(err, "failed to compile shell")
}
diffShellStage := llb.Diff(builtinSystemStage, shellStage, llb.WithCustomName("install shell"))
diffSSHStage := llb.Diff(builtinSystemStage, sshStage, llb.WithCustomName("install ssh keys"))
pypiStage := llb.Diff(builtinSystemStage, g.compilePyPIPackages(builtinSystemStage), llb.WithCustomName("install PyPI packages"))
systemStage := llb.Diff(builtinSystemStage, g.compileSystemPackages(builtinSystemStage), llb.WithCustomName("install system packages"))
sshStage, err := g.copyEnvdSSHServerWithKey()

if err != nil {
return llb.State{}, errors.Wrap(err, "failed to copy SSH key")
Expand All @@ -124,11 +118,11 @@ func (g Graph) Compile() (llb.State, error) {
var merged llb.State
if vscodeStage != nil {
merged = llb.Merge([]llb.State{
builtinSystemStage, systemStage, pypiStage, sshStage, *vscodeStage, diffShellStage,
builtinSystemStage, systemStage, diffSSHStage, pypiStage, *vscodeStage, diffShellStage,
}, llb.WithCustomName("merging all components into one"))
} else {
merged = llb.Merge([]llb.State{
builtinSystemStage, systemStage, pypiStage, sshStage, diffShellStage,
builtinSystemStage, systemStage, diffSSHStage, pypiStage, diffShellStage,
}, llb.WithCustomName("merging all components into one"))
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/lang/ir/python.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ func (g Graph) compilePyPIPackages(root llb.State) llb.State {
// Wait until https://github.com/moby/buildkit/commit/31054718bf775bf32d1376fe1f3611985f837584 is released in v0.10.4
sb.WriteString("sudo chown -R 1000:1000 ")
sb.WriteString(filepath.Dir(cacheDir))
sb.WriteString("&& pip install")
sb.WriteString("&& pip install --no-warn-script-location")
for _, pkg := range g.PyPIPackages {
sb.WriteString(fmt.Sprintf(" %s", pkg))
}
Expand Down
68 changes: 12 additions & 56 deletions pkg/lang/ir/system.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,7 @@ import (
"github.com/cockroachdb/errors"
"github.com/moby/buildkit/client/llb"
"github.com/sirupsen/logrus"
"github.com/spf13/viper"
"github.com/tensorchord/envd/pkg/config"
"github.com/tensorchord/envd/pkg/flag"
)

func (g Graph) compileUbuntuAPT(root llb.State) llb.State {
Expand Down Expand Up @@ -53,52 +51,8 @@ func (g Graph) compileRun(root llb.State) llb.State {
return run.Root()
}

func (g Graph) compileBuiltinSystemPackages(root llb.State) llb.State {
// TODO(gaocegege): Refactor it to avoid shell configuration in built-in system packages.
// Do not need to install bash or sh since it is built-in
if g.Shell == shellZSH {
g.BuiltinSystemPackages = append(g.BuiltinSystemPackages, shellZSH)
}

if len(g.BuiltinSystemPackages) == 0 {
return root
}

// Compose the package install command.
var sb strings.Builder
sb.WriteString(
"sh -c \"apt-get update && apt-get install -y --no-install-recommends")
for _, pkg := range g.BuiltinSystemPackages {
sb.WriteString(fmt.Sprintf(" %s", pkg))
}
sb.WriteString("\"")

cacheDir := "/var/cache/apt"
cacheLibDir := "/var/lib/apt"

run := root.Run(llb.Shlex(sb.String()),
llb.WithCustomNamef("(built-in packages) apt-get install %s",
strings.Join(g.BuiltinSystemPackages, " ")))
run.AddMount(cacheDir, llb.Scratch(),
llb.AsPersistentCacheDir(g.CacheID(cacheDir), llb.CacheMountShared))
run.AddMount(cacheLibDir, llb.Scratch(),
llb.AsPersistentCacheDir(g.CacheID(cacheLibDir), llb.CacheMountShared))

// TODO(gaocegege): Refactor user to a seperate stage.
res := run.
Run(llb.Shlex("groupadd -g 1000 envd"), llb.WithCustomName("create user group envd")).
Run(llb.Shlex("useradd -p \"\" -u 1000 -g envd -s /bin/sh -m envd"), llb.WithCustomName("create user envd")).
Run(llb.Shlex("adduser envd sudo"), llb.WithCustomName("add user envd to sudoers"))
return llb.User("envd")(res.Root())
}

func (g *Graph) compileCUDAPackages() llb.State {
root := llb.Image(
fmt.Sprintf("nvidia/cuda:%s.0-cudnn%s-devel-%s", *g.CUDA, *g.CUDNN, g.OS))
g.BuiltinSystemPackages = append(g.BuiltinSystemPackages, []string{
g.Language,
fmt.Sprintf("%s-pip", g.Language),
}...)
root := llb.Image(fmt.Sprintf("gaocegege/python:3.8-%s-cuda%s-cudnn%s", g.OS, *g.CUDA, *g.CUDNN))
return root
}

Expand All @@ -109,7 +63,7 @@ func (g Graph) compileSystemPackages(root llb.State) llb.State {

// Compose the package install command.
var sb strings.Builder
sb.WriteString("sudo apt-get install -y --no-install-recommends")
sb.WriteString("sudo apt-get update && sudo apt-get install -y --no-install-recommends")

for _, pkg := range g.SystemPackages {
sb.WriteString(fmt.Sprintf(" %s", pkg))
Expand All @@ -118,7 +72,7 @@ func (g Graph) compileSystemPackages(root llb.State) llb.State {
cacheDir := "/var/cache/apt"
cacheLibDir := "/var/lib/apt"

run := root.Run(llb.Shlex(sb.String()),
run := root.Run(llb.Shlex(fmt.Sprintf("bash -c \"%s\"", sb.String())),
llb.WithCustomNamef("(user-defined packages) apt-get install %s",
strings.Join(g.SystemPackages, " ")))
run.AddMount(cacheDir, llb.Scratch(),
Expand All @@ -131,25 +85,27 @@ func (g Graph) compileSystemPackages(root llb.State) llb.State {
func (g *Graph) compileBase() llb.State {
var base llb.State
if g.CUDA == nil && g.CUDNN == nil {
base = llb.Image("docker.io/library/python:3.8")
base = llb.Image("docker.io/gaocegege/python:3.8-ubuntu20.04")
} else {
base = g.compileCUDAPackages()
}
return base
// TODO(gaocegege): Refactor user to a seperate stage.
res := base.
Run(llb.Shlex("groupadd -g 1000 envd"), llb.WithCustomName("create user group envd")).
Run(llb.Shlex("useradd -p \"\" -u 1000 -g envd -s /bin/sh -m envd"), llb.WithCustomName("create user envd")).
Run(llb.Shlex("adduser envd sudo"), llb.WithCustomName("add user envd to sudoers"))
return llb.User("envd")(res.Root())
}

func (g Graph) copyEnvdSSHServerWithKey() (llb.State, error) {
func (g Graph) copySSHKey(root llb.State) (llb.State, error) {
// TODO(gaocegege): Remove global var ssh image.
public := DefaultGraph.PublicKeyPath
bdat, err := os.ReadFile(public)
dat := strings.TrimSuffix(string(bdat), "\n")
if err != nil {
return llb.State{}, errors.Wrap(err, "Cannot read public SSH key")
}
run := llb.Image(viper.GetString(flag.FlagSSHImage)).
File(llb.Copy(llb.Image(viper.GetString(flag.FlagSSHImage)),
"usr/bin/envd-ssh", "/var/envd/bin/envd-ssh",
&llb.CopyInfo{CreateDestPath: true}), llb.WithCustomName("install envd-ssh")).
run := root.
File(llb.Mkfile(config.ContainerauthorizedKeysPath,
0644, []byte(dat+" envd"), llb.WithUIDGID(defaultUID, defaultGID)), llb.WithCustomName("install ssh keys"))
return run, nil
Expand Down

0 comments on commit 8e1e498

Please sign in to comment.