From 2cfc5e08696efda6536968b60a6c3365a7580998 Mon Sep 17 00:00:00 2001 From: Max Asnaashari Date: Thu, 25 Jan 2024 09:50:39 +0000 Subject: [PATCH 01/11] microcloud/service: Add tokens endpoint During init, to handle the case where another system is already clustered on a particular service, we need to be able to request this node to issue a token for us from an as-yet untrusted system. This endpoint is untrusted by the cluster, but authenticated with a secret generated during mDNS lookup, so we can use it as a proxy to the unix socket on the remote system, where we will be trusted and can issue a token. Signed-off-by: Max Asnaashari --- microcloud/api/services.go | 43 ++++++++++++++++++++++++++++++ microcloud/api/types/services.go | 6 +++++ microcloud/client/client.go | 14 ++++++++++ microcloud/cmd/microcloudd/main.go | 1 + microcloud/service/microcloud.go | 20 ++++++++++++++ 5 files changed, 84 insertions(+) diff --git a/microcloud/api/services.go b/microcloud/api/services.go index 4a1aaa5d4..08cb59dc7 100644 --- a/microcloud/api/services.go +++ b/microcloud/api/services.go @@ -4,12 +4,14 @@ import ( "encoding/json" "fmt" "net/http" + "net/url" "github.com/canonical/lxd/lxd/response" "github.com/canonical/lxd/lxd/util" "github.com/canonical/lxd/shared/logger" "github.com/canonical/microcluster/rest" "github.com/canonical/microcluster/state" + "github.com/gorilla/mux" "github.com/canonical/microcloud/microcloud/api/types" "github.com/canonical/microcloud/microcloud/service" @@ -55,6 +57,47 @@ var ServicesCmd = func(sh *service.Handler) rest.Endpoint { } } +// ServiceTokensCmd represents the /1.0/services/serviceType/tokens API on MicroCloud. +var ServiceTokensCmd = func(sh *service.Handler) rest.Endpoint { + return rest.Endpoint{ + AllowedBeforeInit: true, + Name: "services/{serviceType}/tokens", + Path: "services/{serviceType}/tokens", + + Post: rest.EndpointAction{Handler: authHandler(sh, serviceTokensPost), AllowUntrusted: true, ProxyTarget: true}, + } +} + +// serviceTokensPost issues a token for service using the MicroCloud proxy. +// Normally a token request to a service would be restricted to trusted systems, +// so this endpoint validates the mDNS auth token and then proxies the request to the local unix socket of the remote system. +func serviceTokensPost(s *state.State, r *http.Request) response.Response { + serviceType, err := url.PathUnescape(mux.Vars(r)["serviceType"]) + if err != nil { + return response.SmartError(err) + } + + // Parse the request. + req := types.ServiceTokensPost{} + + err = json.NewDecoder(r.Body).Decode(&req) + if err != nil { + return response.BadRequest(err) + } + + sh, err := service.NewHandler(s.Name(), req.ClusterAddress, s.OS.StateDir, false, false, types.ServiceType(serviceType)) + if err != nil { + return response.SmartError(err) + } + + token, err := sh.Services[types.ServiceType(serviceType)].IssueToken(s.Context, req.JoinerName) + if err != nil { + return response.SmartError(fmt.Errorf("Failed to issue %s token for peer %q: %w", serviceType, req.JoinerName, err)) + } + + return response.SyncResponse(true, token) +} + // servicesPut updates the cluster status of the MicroCloud peer. func servicesPut(state *state.State, r *http.Request) response.Response { // Parse the request. diff --git a/microcloud/api/types/services.go b/microcloud/api/types/services.go index b5251d6cd..2eec63841 100644 --- a/microcloud/api/types/services.go +++ b/microcloud/api/types/services.go @@ -36,3 +36,9 @@ type ServiceToken struct { Service ServiceType `json:"service" yaml:"service"` JoinToken string `json:"join_token" yaml:"join_token"` } + +// ServiceTokensPost represents a request to issue a join token for a MicroCloud service. +type ServiceTokensPost struct { + ClusterAddress string `json:"cluster_address" yaml:"cluster_address"` + JoinerName string `json:"joiner_name" yaml:"joiner_name"` +} diff --git a/microcloud/client/client.go b/microcloud/client/client.go index a186f4762..f8651cc81 100644 --- a/microcloud/client/client.go +++ b/microcloud/client/client.go @@ -23,3 +23,17 @@ func JoinServices(ctx context.Context, c *client.Client, data types.ServicesPut) return nil } + +// RemoteIssueToken issues a token on the remote MicroCloud, trusted by the mDNS auth secret. +func RemoteIssueToken(ctx context.Context, c *client.Client, serviceType types.ServiceType, data types.ServiceTokensPost) (string, error) { + queryCtx, cancel := context.WithTimeout(ctx, 5*time.Minute) + defer cancel() + + var token string + err := c.Query(queryCtx, "POST", api.NewURL().Path("services", string(serviceType), "tokens"), data, &token) + if err != nil { + return "", fmt.Errorf("Failed to issue remote token: %w", err) + } + + return token, nil +} diff --git a/microcloud/cmd/microcloudd/main.go b/microcloud/cmd/microcloudd/main.go index a2fead92f..b0a2b84c9 100644 --- a/microcloud/cmd/microcloudd/main.go +++ b/microcloud/cmd/microcloudd/main.go @@ -131,6 +131,7 @@ func (c *cmdDaemon) Run(cmd *cobra.Command, args []string) error { endpoints := []rest.Endpoint{ api.ServicesCmd(s), + api.ServiceTokensCmd(s), api.LXDProxy(s), api.CephProxy(s), api.OVNProxy(s), diff --git a/microcloud/service/microcloud.go b/microcloud/service/microcloud.go index bc8081e32..f460c6fed 100644 --- a/microcloud/service/microcloud.go +++ b/microcloud/service/microcloud.go @@ -95,6 +95,26 @@ func (s CloudService) IssueToken(ctx context.Context, peer string) (string, erro return s.client.NewJoinToken(peer) } +// RemoteIssueToken issues a token for the given peer on a remote MicroCloud where we are authorized by mDNS. +func (s CloudService) RemoteIssueToken(ctx context.Context, clusterAddress string, secret string, peer string, serviceType types.ServiceType) (string, error) { + c, err := s.client.RemoteClient(util.CanonicalNetworkAddress(clusterAddress, CloudPort)) + if err != nil { + return "", err + } + + c.Client.Client.Transport = &http.Transport{ + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, + DisableKeepAlives: true, + Proxy: func(r *http.Request) (*url.URL, error) { + r.Header.Set("X-MicroCloud-Auth", secret) + + return shared.ProxyFromEnvironment(r) + }, + } + + return client.RemoteIssueToken(ctx, c, serviceType, types.ServiceTokensPost{ClusterAddress: c.URL().URL.Host, JoinerName: peer}) +} + // Join joins a cluster with the given token. func (s CloudService) Join(ctx context.Context, joinConfig JoinConfig) error { return s.client.JoinCluster(s.name, util.CanonicalNetworkAddress(s.address, s.port), joinConfig.Token, nil, 5*time.Minute) From 93e46988aef0228c1ee0d39f6c17f8fca0020bc3 Mon Sep 17 00:00:00 2001 From: Max Asnaashari Date: Thu, 25 Jan 2024 15:48:15 +0000 Subject: [PATCH 02/11] microcloud/service: Allow fetching remote cluster members Signed-off-by: Max Asnaashari --- microcloud/service/interface.go | 2 ++ microcloud/service/lxd.go | 17 +++++++++++++++- microcloud/service/microceph.go | 34 ++++++++++++++++++++++---------- microcloud/service/microcloud.go | 26 ++++++++++++++++++++++++ microcloud/service/microovn.go | 34 ++++++++++++++++++++++---------- 5 files changed, 92 insertions(+), 21 deletions(-) diff --git a/microcloud/service/interface.go b/microcloud/service/interface.go index a2b21dbec..c91c4032c 100644 --- a/microcloud/service/interface.go +++ b/microcloud/service/interface.go @@ -11,7 +11,9 @@ type Service interface { Bootstrap(ctx context.Context) error IssueToken(ctx context.Context, peer string) (string, error) Join(ctx context.Context, config JoinConfig) error + ClusterMembers(ctx context.Context) (map[string]string, error) + RemoteClusterMembers(ctx context.Context, secret string, address string) (map[string]string, error) Type() types.ServiceType Name() string diff --git a/microcloud/service/lxd.go b/microcloud/service/lxd.go index d371635d7..39aff2568 100644 --- a/microcloud/service/lxd.go +++ b/microcloud/service/lxd.go @@ -225,13 +225,28 @@ func (s LXDService) IssueToken(ctx context.Context, peer string) (string, error) return joinToken.String(), nil } -// ClusterMembers returns a map of cluster member names and addresses. +// RemoteClusterMembers returns a map of cluster member names and addresses from the MicroCloud at the given address, authenticated with the given secret. +func (s LXDService) RemoteClusterMembers(ctx context.Context, secret string, address string) (map[string]string, error) { + client, err := s.remoteClient(secret, address, CloudPort) + if err != nil { + return nil, err + } + + return s.clusterMembers(client) +} + +// ClusterMembers returns a map of cluster member names. func (s LXDService) ClusterMembers(ctx context.Context) (map[string]string, error) { client, err := s.Client(ctx, "") if err != nil { return nil, err } + return s.clusterMembers(client) +} + +// clusterMembers returns a map of cluster member names and addresses. +func (s LXDService) clusterMembers(client lxd.InstanceServer) (map[string]string, error) { members, err := client.GetClusterMembers() if err != nil { return nil, err diff --git a/microcloud/service/microceph.go b/microcloud/service/microceph.go index 448bf6747..dfc9dae61 100644 --- a/microcloud/service/microceph.go +++ b/microcloud/service/microceph.go @@ -2,6 +2,7 @@ package service import ( "context" + "crypto/tls" "fmt" "net/http" "net/url" @@ -130,24 +131,37 @@ func (s CephService) Join(ctx context.Context, joinConfig JoinConfig) error { return nil } -// ClusterMembers returns a map of cluster member names and addresses. -func (s CephService) ClusterMembers(ctx context.Context) (map[string]string, error) { - client, err := s.Client("", "") +// RemoteClusterMembers returns a map of cluster member names and addresses from the MicroCloud at the given address, authenticated with the given secret. +func (s CephService) RemoteClusterMembers(ctx context.Context, secret string, address string) (map[string]string, error) { + client, err := s.m.RemoteClient(util.CanonicalNetworkAddress(address, CloudPort)) if err != nil { return nil, err } - members, err := client.GetClusterMembers(ctx) - if err != nil { - return nil, err + client.Client.Client.Transport = &http.Transport{ + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, + DisableKeepAlives: true, + Proxy: func(r *http.Request) (*url.URL, error) { + r.Header.Set("X-MicroCloud-Auth", secret) + if !strings.HasPrefix(r.URL.Path, "/1.0/services/microceph") { + r.URL.Path = "/1.0/services/microceph" + r.URL.Path + } + + return shared.ProxyFromEnvironment(r) + }, } - genericMembers := make(map[string]string, len(members)) - for _, member := range members { - genericMembers[member.Name] = member.Address.String() + return clusterMembers(ctx, client) +} + +// ClusterMembers returns a map of cluster member names and addresses. +func (s CephService) ClusterMembers(ctx context.Context) (map[string]string, error) { + client, err := s.Client("", "") + if err != nil { + return nil, err } - return genericMembers, nil + return clusterMembers(ctx, client) } // Type returns the type of Service. diff --git a/microcloud/service/microcloud.go b/microcloud/service/microcloud.go index f460c6fed..e6f7413cd 100644 --- a/microcloud/service/microcloud.go +++ b/microcloud/service/microcloud.go @@ -13,6 +13,7 @@ import ( "github.com/canonical/lxd/shared" "github.com/canonical/lxd/shared/api" cephTypes "github.com/canonical/microceph/microceph/api/types" + microClient "github.com/canonical/microcluster/client" "github.com/canonical/microcluster/config" "github.com/canonical/microcluster/microcluster" "github.com/canonical/microcluster/rest" @@ -143,6 +144,26 @@ func (s CloudService) RequestJoin(ctx context.Context, secret string, name strin return client.JoinServices(ctx, c, joinConfig) } +// RemoteClusterMembers returns a map of cluster member names and addresses from the MicroCloud at the given address, authenticated with the given secret. +func (s CloudService) RemoteClusterMembers(ctx context.Context, secret string, address string) (map[string]string, error) { + client, err := s.client.RemoteClient(util.CanonicalNetworkAddress(address, CloudPort)) + if err != nil { + return nil, err + } + + client.Client.Client.Transport = &http.Transport{ + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, + DisableKeepAlives: true, + Proxy: func(r *http.Request) (*url.URL, error) { + r.Header.Set("X-MicroCloud-Auth", secret) + + return shared.ProxyFromEnvironment(r) + }, + } + + return clusterMembers(ctx, client) +} + // ClusterMembers returns a map of cluster member names and addresses. func (s CloudService) ClusterMembers(ctx context.Context) (map[string]string, error) { client, err := s.client.LocalClient() @@ -150,6 +171,11 @@ func (s CloudService) ClusterMembers(ctx context.Context) (map[string]string, er return nil, err } + return clusterMembers(ctx, client) +} + +// clusterMembers returns a map of cluster member names and addresses. +func clusterMembers(ctx context.Context, client *microClient.Client) (map[string]string, error) { members, err := client.GetClusterMembers(ctx) if err != nil { return nil, err diff --git a/microcloud/service/microovn.go b/microcloud/service/microovn.go index 26993d0e5..c9cbbd7a6 100644 --- a/microcloud/service/microovn.go +++ b/microcloud/service/microovn.go @@ -2,6 +2,7 @@ package service import ( "context" + "crypto/tls" "fmt" "net/http" "net/url" @@ -91,24 +92,37 @@ func (s OVNService) Join(ctx context.Context, joinConfig JoinConfig) error { return s.m.JoinCluster(s.name, util.CanonicalNetworkAddress(s.address, s.port), joinConfig.Token, nil, 5*time.Minute) } -// ClusterMembers returns a map of cluster member names and addresses. -func (s OVNService) ClusterMembers(ctx context.Context) (map[string]string, error) { - client, err := s.Client() +// RemoteClusterMembers returns a map of cluster member names and addresses from the MicroCloud at the given address, authenticated with the given secret. +func (s OVNService) RemoteClusterMembers(ctx context.Context, secret string, address string) (map[string]string, error) { + client, err := s.m.RemoteClient(util.CanonicalNetworkAddress(address, CloudPort)) if err != nil { return nil, err } - members, err := client.GetClusterMembers(ctx) - if err != nil { - return nil, err + client.Client.Client.Transport = &http.Transport{ + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, + DisableKeepAlives: true, + Proxy: func(r *http.Request) (*url.URL, error) { + r.Header.Set("X-MicroCloud-Auth", secret) + if !strings.HasPrefix(r.URL.Path, "/1.0/services/microovn") { + r.URL.Path = "/1.0/services/microovn" + r.URL.Path + } + + return shared.ProxyFromEnvironment(r) + }, } - genericMembers := make(map[string]string, len(members)) - for _, member := range members { - genericMembers[member.Name] = member.Address.String() + return clusterMembers(ctx, client) +} + +// ClusterMembers returns a map of cluster member names and addresses. +func (s OVNService) ClusterMembers(ctx context.Context) (map[string]string, error) { + client, err := s.Client() + if err != nil { + return nil, err } - return genericMembers, nil + return clusterMembers(ctx, client) } // Type returns the type of Service. From 62c8192f3795697a249f3f87e930b59cca5328a1 Mon Sep 17 00:00:00 2001 From: Max Asnaashari Date: Thu, 25 Jan 2024 10:02:24 +0000 Subject: [PATCH 03/11] microcloud/cmd/microcloud: Add checkClustered helper Signed-off-by: Max Asnaashari --- microcloud/cmd/microcloud/ask.go | 42 +++++++++++++ microcloud/cmd/microcloud/main_init.go | 84 ++++++++++++++++++++++++++ 2 files changed, 126 insertions(+) diff --git a/microcloud/cmd/microcloud/ask.go b/microcloud/cmd/microcloud/ask.go index ac78cad98..4e296969d 100644 --- a/microcloud/cmd/microcloud/ask.go +++ b/microcloud/cmd/microcloud/ask.go @@ -7,6 +7,7 @@ import ( "sort" "strings" + "github.com/canonical/lxd/shared" "github.com/canonical/lxd/shared/api" cli "github.com/canonical/lxd/shared/cmd" "github.com/canonical/lxd/shared/logger" @@ -822,3 +823,44 @@ func (c *CmdControl) askNetwork(sh *service.Handler, systems map[string]InitSyst return nil } + +// askClustered checks whether any of the selected systems have already initialized any expected services. +// If a service is already initialized on some systems, we will offer to add the remaining systems, or skip that service. +// If multiple systems have separately initialized the same service, we will abort initialization. +// Preseed yamls will have a flag that sets whether to reuse the cluster. +// In auto setup, we will expect no initialized services so that we can be opinionated about how we configure the cluster without user input. +func (c *CmdControl) askClustered(s *service.Handler, autoSetup bool, systems map[string]InitSystem) error { + expectedServices := make(map[types.ServiceType]service.Service, len(s.Services)) + for k, v := range s.Services { + expectedServices[k] = v + } + + for serviceType := range expectedServices { + initializedSystem, _, err := checkClustered(s, autoSetup, serviceType, systems) + if err != nil { + return err + } + + if initializedSystem != "" { + question := fmt.Sprintf("%q is already part of a %s cluster. Do you want to add this cluster to Microcloud? (add/skip) [default=add]", initializedSystem, serviceType) + validator := func(s string) error { + if !shared.ValueInSlice[string](s, []string{"add", "skip"}) { + return fmt.Errorf("Invalid input, expected one of (add,skip) but got %q", s) + } + + return nil + } + + addOrSkip, err := c.asker.AskString(question, "add", validator) + if err != nil { + return err + } + + if addOrSkip != "add" { + delete(s.Services, serviceType) + } + } + } + + return nil +} diff --git a/microcloud/cmd/microcloud/main_init.go b/microcloud/cmd/microcloud/main_init.go index 99b0d71e7..5ceb98d8e 100644 --- a/microcloud/cmd/microcloud/main_init.go +++ b/microcloud/cmd/microcloud/main_init.go @@ -30,6 +30,8 @@ import ( type InitSystem struct { ServerInfo mdns.ServerInfo // Data reported by mDNS about this system. + InitializedServices map[types.ServiceType]map[string]string // A map of services and their cluster members, if initialized. + AvailableDisks []lxdAPI.ResourcesStorageDisk // Disks as reported by LXD. MicroCephDisks []cephTypes.DisksPost // Disks intended to be passed to MicroCeph. @@ -132,6 +134,11 @@ func (c *cmdInit) RunInteractive(cmd *cobra.Command, args []string) error { return err } + err = c.common.askClustered(s, c.flagAutoSetup, systems) + if err != nil { + return err + } + err = c.common.askDisks(s, systems, c.flagAutoSetup, c.flagWipeAllDisks) if err != nil { return err @@ -532,6 +539,83 @@ func validateSystems(s *service.Handler, systems map[string]InitSystem) (err err return nil } +// checkClustered checks whether any of the selected systems have already initialized a service. +// Returns the first system we find that is initialized for the given service, along with all of that system's existing cluster members. +func checkClustered(s *service.Handler, autoSetup bool, serviceType types.ServiceType, systems map[string]InitSystem) (firstInitializedSystem string, existingMembers map[string]string, err error) { + // LXD should always be uninitialized at this point, so we can just return default values that consider LXD uninitialized. + if serviceType == types.LXD { + return "", nil, nil + } + + for peer, system := range systems { + var remoteClusterMembers map[string]string + var err error + + // If the peer in question is ourselves, we can just use the unix socket. + if peer == s.Name { + remoteClusterMembers, err = s.Services[serviceType].ClusterMembers(context.Background()) + } else { + remoteClusterMembers, err = s.Services[serviceType].RemoteClusterMembers(context.Background(), system.ServerInfo.AuthSecret, system.ServerInfo.Address) + } + + if err != nil && err.Error() != "Daemon not yet initialized" { + return "", nil, fmt.Errorf("Failed to reach %s on system %q: %w", serviceType, peer, err) + } + + // If we failed to retrieve cluster members due to the system not being initialized, we can ignore it. + if err != nil { + continue + } + + clusterMembers := map[string]string{} + for k, v := range remoteClusterMembers { + host, _, err := net.SplitHostPort(v) + if err != nil { + return "", nil, err + } + + clusterMembers[k] = host + } + + if autoSetup { + return "", nil, fmt.Errorf("System %q is already clustered on %s", peer, serviceType) + } + + // If this is the first clustered system we found, then record its cluster members. + if firstInitializedSystem == "" { + // Record that this system has initialized the service. + existingMembers = clusterMembers + if system.InitializedServices == nil { + system.InitializedServices = map[types.ServiceType]map[string]string{} + } + + system.InitializedServices[serviceType] = clusterMembers + systems[peer] = system + firstInitializedSystem = peer + + if clusterMembers[peer] != systems[peer].ServerInfo.Address && clusterMembers[peer] != "" { + return "", nil, fmt.Errorf("%s is already set up on %q on a different network", serviceType, peer) + } + + continue + } + + // If we've already encountered a clustered system, check if there's a mismatch in cluster members. + for k, v := range existingMembers { + if clusterMembers[k] != v { + return "", nil, fmt.Errorf("%q and %q are already part of different %s clusters. Aborting initialization", firstInitializedSystem, peer, serviceType) + } + } + + // Ensure the maps are identical. + if len(clusterMembers) != len(existingMembers) { + return "", nil, fmt.Errorf("Some systems are already part of different %s clusters. Aborting initialization", serviceType) + } + } + + return firstInitializedSystem, existingMembers, nil +} + // setupCluster Bootstraps the cluster if necessary, adds all peers to the cluster, and completes any post cluster // configuration. func setupCluster(s *service.Handler, systems map[string]InitSystem) error { From 00026aded14e731a24f744efe01a86a596db5cbf Mon Sep 17 00:00:00 2001 From: Max Asnaashari Date: Thu, 18 Apr 2024 00:07:34 +0000 Subject: [PATCH 04/11] microcloud/cmd/microcloud: Consider existing clusters when forming the cluster Signed-off-by: Max Asnaashari --- microcloud/cmd/microcloud/main_init.go | 73 ++++++++++++++++++++------ 1 file changed, 57 insertions(+), 16 deletions(-) diff --git a/microcloud/cmd/microcloud/main_init.go b/microcloud/cmd/microcloud/main_init.go index 5ceb98d8e..66b72f9b4 100644 --- a/microcloud/cmd/microcloud/main_init.go +++ b/microcloud/cmd/microcloud/main_init.go @@ -619,15 +619,41 @@ func checkClustered(s *service.Handler, autoSetup bool, serviceType types.Servic // setupCluster Bootstraps the cluster if necessary, adds all peers to the cluster, and completes any post cluster // configuration. func setupCluster(s *service.Handler, systems map[string]InitSystem) error { + initializedServices := map[types.ServiceType]string{} _, bootstrap := systems[s.Name] if bootstrap { + for serviceType := range s.Services { + for peer, system := range systems { + if system.InitializedServices[serviceType] != nil { + initializedServices[serviceType] = peer + break + } + } + } + fmt.Println("Initializing a new cluster") + mu := sync.Mutex{} err := s.RunConcurrent(true, false, func(s service.Service) error { + // If there's already an initialized system for this service, we don't need to bootstrap it. + if initializedServices[s.Type()] != "" { + return nil + } + err := s.Bootstrap(context.Background()) if err != nil { return fmt.Errorf("Failed to bootstrap local %s: %w", s.Type(), err) } + mu.Lock() + clustered := systems[s.Name()] + if clustered.InitializedServices == nil { + clustered.InitializedServices = map[types.ServiceType]map[string]string{} + } + + clustered.InitializedServices[s.Type()] = map[string]string{s.Name(): s.Address()} + systems[s.Name()] = clustered + mu.Unlock() + fmt.Printf(" Local %s is ready\n", s.Type()) return nil @@ -635,30 +661,45 @@ func setupCluster(s *service.Handler, systems map[string]InitSystem) error { if err != nil { return err } + } + + err := AddPeers(s, systems) + if err != nil { + return err + } + + if bootstrap { + // Joiners will add their disks as part of the join process, so only add disks here for the system we bootstrapped, or already existed. + peer := s.Name + microCeph := initializedServices[types.MicroCeph] + if microCeph != "" { + peer = microCeph + } + + for name := range systems[peer].InitializedServices[types.MicroCeph] { + // There may be existing cluster members that are not a part of MicroCloud, so ignore those. + if systems[name].ServerInfo.Name == "" { + continue + } - // Only add disks for the local MicroCeph as other systems will add their disks upon joining. - var c *client.Client - for _, disk := range systems[s.Name].MicroCephDisks { - if c == nil { - c, err = s.Services[types.MicroCeph].(*service.CephService).Client("", "") + var c *client.Client + for _, disk := range systems[name].MicroCephDisks { + if c == nil { + c, err = s.Services[types.MicroCeph].(*service.CephService).Client(name, systems[name].ServerInfo.AuthSecret) + if err != nil { + return err + } + } + + logger.Debug("Adding disk to MicroCeph", logger.Ctx{"name": name, "disk": disk.Path}) + _, err = cephClient.AddDisk(context.Background(), c, &disk) if err != nil { return err } } - - logger.Debug("Adding disk to MicroCeph", logger.Ctx{"peer": s.Name, "disk": disk.Path}) - _, err = cephClient.AddDisk(context.Background(), c, &disk) - if err != nil { - return err - } } } - err := AddPeers(s, systems) - if err != nil { - return err - } - fmt.Println("Configuring cluster-wide devices ...") var ovnConfig string From b6d4c86f36c52a51f19f83840a5fa628630a6c2e Mon Sep 17 00:00:00 2001 From: Max Asnaashari Date: Thu, 18 Apr 2024 00:30:33 +0000 Subject: [PATCH 05/11] microcloud/cmd/microcloud: Move token issuing to AddPeers Detecting already clustered members will make this block very complex, so separate it out from `waitForJoin` so that the scope of the helper is just to wait for nodes to join Signed-off-by: Max Asnaashari --- microcloud/cmd/microcloud/main_init.go | 42 ++++++++++++++------------ 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/microcloud/cmd/microcloud/main_init.go b/microcloud/cmd/microcloud/main_init.go index 66b72f9b4..6951a9199 100644 --- a/microcloud/cmd/microcloud/main_init.go +++ b/microcloud/cmd/microcloud/main_init.go @@ -330,29 +330,12 @@ func lookupPeers(s *service.Handler, autoSetup bool, iface *net.Interface, subne return nil } -// waitForJoin issues a token and instructs a system to request a join, +// waitForJoin requests a system to join each service's respective cluster, // and then waits for the request to either complete or time out. // If the request was successful, it additionally waits until the cluster appears in the database. func waitForJoin(sh *service.Handler, clusterSize int, secret string, peer string, cfg types.ServicesPut) error { - mut := sync.Mutex{} - err := sh.RunConcurrent(false, false, func(s service.Service) error { - token, err := s.IssueToken(context.Background(), peer) - if err != nil { - return fmt.Errorf("Failed to issue %s token for peer %q: %w", s.Type(), peer, err) - } - - mut.Lock() - cfg.Tokens = append(cfg.Tokens, types.ServiceToken{Service: s.Type(), JoinToken: token}) - mut.Unlock() - - return nil - }) - if err != nil { - return err - } - cloud := sh.Services[types.MicroCloud].(*service.CloudService) - err = cloud.RequestJoin(context.Background(), secret, peer, cfg) + err := cloud.RequestJoin(context.Background(), secret, peer, cfg) if err != nil { return fmt.Errorf("System %q failed to join the cluster: %w", peer, err) } @@ -407,6 +390,27 @@ func AddPeers(sh *service.Handler, systems map[string]InitSystem) error { if err != nil { return fmt.Errorf("Failed to inspect existing cluster: %w", err) } + // Concurrently issue a token for each joiner. + for peer := range systems { + mut := sync.Mutex{} + err := sh.RunConcurrent(false, false, func(s service.Service) error { + token, err = s.IssueToken(context.Background(), peer) + if err != nil { + return fmt.Errorf("Failed to issue %s token for peer %q: %w", s.Type(), peer, err) + } + + mut.Lock() + cfg := joinConfig[peer] + cfg.Tokens = append(cfg.Tokens, types.ServiceToken{Service: s.Type(), JoinToken: token}) + joinConfig[peer] = cfg + mut.Unlock() + + return nil + }) + if err != nil { + return err + } + } clusterSize := len(cluster) From 3e187dfe0e831eaf146cb641a5aa695d36d7b474 Mon Sep 17 00:00:00 2001 From: Max Asnaashari Date: Thu, 18 Apr 2024 00:32:23 +0000 Subject: [PATCH 06/11] microcloud/cmd/microcloud: Issue tokens on a remote system if it's clustered The cluster size delta per service will become uneven if some nodes are already clustered, and we can't guarantee that the local node isn't already participating in some of those clusters, so we need to handle each service more explicitly by carrying a map around the cluster join process. Signed-off-by: Max Asnaashari --- microcloud/cmd/microcloud/main_init.go | 118 +++++++++++++++++++------ 1 file changed, 90 insertions(+), 28 deletions(-) diff --git a/microcloud/cmd/microcloud/main_init.go b/microcloud/cmd/microcloud/main_init.go index 6951a9199..323381a30 100644 --- a/microcloud/cmd/microcloud/main_init.go +++ b/microcloud/cmd/microcloud/main_init.go @@ -333,7 +333,7 @@ func lookupPeers(s *service.Handler, autoSetup bool, iface *net.Interface, subne // waitForJoin requests a system to join each service's respective cluster, // and then waits for the request to either complete or time out. // If the request was successful, it additionally waits until the cluster appears in the database. -func waitForJoin(sh *service.Handler, clusterSize int, secret string, peer string, cfg types.ServicesPut) error { +func waitForJoin(sh *service.Handler, clusterSizes map[types.ServiceType]int, secret string, peer string, cfg types.ServicesPut) error { cloud := sh.Services[types.MicroCloud].(*service.CloudService) err := cloud.RequestJoin(context.Background(), secret, peer, cfg) if err != nil { @@ -341,24 +341,30 @@ func waitForJoin(sh *service.Handler, clusterSize int, secret string, peer strin } clustered := make(map[types.ServiceType]bool, len(sh.Services)) - for service := range sh.Services { - clustered[service] = false + for _, tokenInfo := range cfg.Tokens { + clustered[tokenInfo.Service] = false } + // Iterate over all services until the database is updated with the new node across all of them. now := time.Now() for len(clustered) != 0 { if time.Since(now) >= time.Second*30 { return fmt.Errorf("Timed out waiting for cluster member %q to appear", peer) } + // Check the size of the cluster for each service. for service := range clustered { systems, err := sh.Services[service].ClusterMembers(context.Background()) if err != nil { return err } - if len(systems) == clusterSize+1 { + // If the size of the cluster has been incremented by 1 from its initial value, + // then we don't need to check the corresponding service anymore. + // So remove the service from consideration and update the current cluster size for the next node. + if len(systems) == clusterSizes[service]+1 { delete(clustered, service) + clusterSizes[service] = clusterSizes[service] + 1 } } } @@ -369,61 +375,117 @@ func waitForJoin(sh *service.Handler, clusterSize int, secret string, peer strin } func AddPeers(sh *service.Handler, systems map[string]InitSystem) error { - joinConfig := make(map[string]types.ServicesPut, len(systems)) - secrets := make(map[string]string, len(systems)) - for peer, info := range systems { - if peer == sh.Name { - continue + // Grab the systems that are clustered from the InitSystem map. + initializedServices := map[types.ServiceType]string{} + existingSystems := map[types.ServiceType]map[string]string{} + for serviceType := range sh.Services { + for peer, system := range systems { + if system.InitializedServices != nil && system.InitializedServices[serviceType] != nil { + initializedServices[serviceType] = peer + existingSystems[serviceType] = system.InitializedServices[serviceType] + break + } } + } + // Prepare a JoinConfig to send to each joiner. + joinConfig := make(map[string]types.ServicesPut, len(systems)) + for peer, info := range systems { joinConfig[peer] = types.ServicesPut{ Tokens: []types.ServiceToken{}, Address: info.ServerInfo.Address, LXDConfig: info.JoinConfig, CephConfig: info.MicroCephDisks, } - - secrets[peer] = info.ServerInfo.AuthSecret } - cluster, err := sh.Services[types.MicroCloud].ClusterMembers(context.Background()) - if err != nil { - return fmt.Errorf("Failed to inspect existing cluster: %w", err) + _, bootstrap := systems[sh.Name] + clusterSize := map[types.ServiceType]int{} + if bootstrap { + for serviceType, clusterMembers := range existingSystems { + clusterSize[serviceType] = len(clusterMembers) + } } + // Concurrently issue a token for each joiner. for peer := range systems { mut := sync.Mutex{} err := sh.RunConcurrent(false, false, func(s service.Service) error { - token, err = s.IssueToken(context.Background(), peer) - if err != nil { - return fmt.Errorf("Failed to issue %s token for peer %q: %w", s.Type(), peer, err) - } + // Only issue a token if the system isn't already part of that cluster. + if existingSystems[s.Type()][peer] == "" { + clusteredSystem := systems[initializedServices[s.Type()]] + + var token string + var err error - mut.Lock() - cfg := joinConfig[peer] - cfg.Tokens = append(cfg.Tokens, types.ServiceToken{Service: s.Type(), JoinToken: token}) - joinConfig[peer] = cfg - mut.Unlock() + // If the local node is part of the pre-existing cluster, or if we are growing the cluster, issue the token locally. + // Otherwise, use the MicroCloud proxy to ask an existing cluster member to issue the token. + if clusteredSystem.ServerInfo.Name == sh.Name || clusteredSystem.ServerInfo.Name == "" { + token, err = s.IssueToken(context.Background(), peer) + if err != nil { + return fmt.Errorf("Failed to issue %s token for peer %q: %w", s.Type(), peer, err) + } + } else { + cloud := sh.Services[types.MicroCloud].(*service.CloudService) + token, err = cloud.RemoteIssueToken(context.Background(), clusteredSystem.ServerInfo.Address, clusteredSystem.ServerInfo.AuthSecret, peer, s.Type()) + if err != nil { + return err + } + } + + // Fetch the current cluster sizes if we are adding a new node. + var currentCluster map[string]string + if !bootstrap { + currentCluster, err = s.ClusterMembers(context.Background()) + if err != nil { + return fmt.Errorf("Failed to check for existing %s cluster size: %w", s.Type(), err) + } + } + + mut.Lock() + + if !bootstrap { + clusterSize[s.Type()] = len(currentCluster) + } + + cfg := joinConfig[peer] + cfg.Tokens = append(cfg.Tokens, types.ServiceToken{Service: s.Type(), JoinToken: token}) + joinConfig[peer] = cfg + mut.Unlock() + } return nil }) if err != nil { return err } - } - - clusterSize := len(cluster) + } fmt.Println("Awaiting cluster formation ...") + + // If the local node needs to join an existing cluster, do it first so we can proceed as normal. + if len(joinConfig[sh.Name].Tokens) > 0 { + cfg := joinConfig[sh.Name] + err := waitForJoin(sh, clusterSize, "", sh.Name, cfg) + if err != nil { + return err + } + + // Sleep 3 seconds to give the cluster roles time to reshuffle before adding more members. + time.Sleep(3 * time.Second) + } + for peer, cfg := range joinConfig { + if len(cfg.Tokens) == 0 || peer == sh.Name { + continue + } + logger.Debug("Initiating sequential request for cluster join", logger.Ctx{"peer": peer}) err := waitForJoin(sh, clusterSize, systems[peer].ServerInfo.AuthSecret, peer, cfg) if err != nil { return err } - clusterSize = clusterSize + 1 - // Sleep 3 seconds to give the cluster roles time to reshuffle before adding more members. time.Sleep(3 * time.Second) } From 056ad1da5e48f8116fb571f082985688e9506f4e Mon Sep 17 00:00:00 2001 From: Max Asnaashari Date: Thu, 25 Jan 2024 15:52:52 +0000 Subject: [PATCH 07/11] microcloud/service: Allow requesting the local node to join a cluster Signed-off-by: Max Asnaashari --- microcloud/service/microcloud.go | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/microcloud/service/microcloud.go b/microcloud/service/microcloud.go index e6f7413cd..56592e202 100644 --- a/microcloud/service/microcloud.go +++ b/microcloud/service/microcloud.go @@ -126,19 +126,28 @@ func (s CloudService) RequestJoin(ctx context.Context, secret string, name strin ctx, cancel := context.WithTimeout(ctx, time.Minute*5) defer cancel() - c, err := s.client.RemoteClient(util.CanonicalNetworkAddress(joinConfig.Address, CloudPort)) - if err != nil { - return err - } + var c *microClient.Client + var err error + if name == s.name { + c, err = s.client.LocalClient() + if err != nil { + return err + } + } else { + c, err = s.client.RemoteClient(util.CanonicalNetworkAddress(joinConfig.Address, CloudPort)) + if err != nil { + return err + } - c.Client.Client.Transport = &http.Transport{ - TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, - DisableKeepAlives: true, - Proxy: func(r *http.Request) (*url.URL, error) { - r.Header.Set("X-MicroCloud-Auth", secret) + c.Client.Client.Transport = &http.Transport{ + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, + DisableKeepAlives: true, + Proxy: func(r *http.Request) (*url.URL, error) { + r.Header.Set("X-MicroCloud-Auth", secret) - return shared.ProxyFromEnvironment(r) - }, + return shared.ProxyFromEnvironment(r) + }, + } } return client.JoinServices(ctx, c, joinConfig) From 961c2161cdbf4744dbb1e94379e8b00bcc44bbd2 Mon Sep 17 00:00:00 2001 From: Max Asnaashari Date: Fri, 26 Jan 2024 03:44:52 +0000 Subject: [PATCH 08/11] microcloud/cmd/microcloud: Add ReuseExistingClusters key to Preseed Signed-off-by: Max Asnaashari --- .../cmd/microcloud/main_init_preseed.go | 33 ++++++++++++++++--- microcloud/cmd/microcloud/preseed_test.go | 12 +++++++ 2 files changed, 40 insertions(+), 5 deletions(-) diff --git a/microcloud/cmd/microcloud/main_init_preseed.go b/microcloud/cmd/microcloud/main_init_preseed.go index ab90a40e7..ea63877f6 100644 --- a/microcloud/cmd/microcloud/main_init_preseed.go +++ b/microcloud/cmd/microcloud/main_init_preseed.go @@ -24,11 +24,12 @@ import ( // Preseed represents the structure of the supported preseed yaml. type Preseed struct { - LookupSubnet string `yaml:"lookup_subnet"` - LookupInterface string `yaml:"lookup_interface"` - Systems []System `yaml:"systems"` - OVN InitNetwork `yaml:"ovn"` - Storage StorageFilter `yaml:"storage"` + LookupSubnet string `yaml:"lookup_subnet"` + LookupInterface string `yaml:"lookup_interface"` + ReuseExistingClusters bool `yaml:"reuse_existing_clusters"` + Systems []System `yaml:"systems"` + OVN InitNetwork `yaml:"ovn"` + Storage StorageFilter `yaml:"storage"` } // System represents the structure of the systems we expect to find in the preseed yaml. @@ -204,6 +205,10 @@ func (p *Preseed) validate(name string, bootstrap bool) error { } } + if !bootstrap && p.ReuseExistingClusters { + return fmt.Errorf("Additional cluster members cannot be part of a pre-existing cluster") + } + if bootstrap && !localInit { return fmt.Errorf("Local MicroCloud must be included in the list of systems when initializing") } @@ -396,6 +401,24 @@ func (p *Preseed) Parse(s *service.Handler, bootstrap bool) (map[string]InitSyst return nil, err } + expectedServices := make(map[types.ServiceType]service.Service, len(s.Services)) + for k, v := range s.Services { + expectedServices[k] = v + } + + for serviceType := range expectedServices { + initializedSystem, _, err := checkClustered(s, false, serviceType, systems) + if err != nil { + return nil, err + } + + if initializedSystem != "" && !p.ReuseExistingClusters { + fmt.Printf("Existing %s cluster on system %q is incompatible with MicroCloud, skipping %s setup\n", serviceType, initializedSystem, serviceType) + + delete(s.Services, serviceType) + } + } + for name, system := range systems { system.MicroCephDisks = []cephTypes.DisksPost{} system.TargetStoragePools = []lxdAPI.StoragePoolsPost{} diff --git a/microcloud/cmd/microcloud/preseed_test.go b/microcloud/cmd/microcloud/preseed_test.go index 0cd5ca400..aedf5736c 100644 --- a/microcloud/cmd/microcloud/preseed_test.go +++ b/microcloud/cmd/microcloud/preseed_test.go @@ -316,3 +316,15 @@ func (s *preseedSuite) Test_preseedMatchDisksMemory() { s.Equal(len(results), 1) s.Equal(results[0], disks[0]) } + +// Tests that ReuseExistingClusters only works when initializing, not when growing the cluster. +func (s *preseedSuite) Test_restrictClusterReuse() { + p := Preseed{ReuseExistingClusters: true, LookupSubnet: "10.0.0.1/24", LookupInterface: "enp5s0", Systems: []System{{Name: "B"}, {Name: "C"}}} + + s.NoError(p.validate("B", true)) + + s.Error(p.validate("A", false)) + + p.ReuseExistingClusters = false + s.NoError(p.validate("A", false)) +} From 59e4aacdbcd52c1a2f06ecb12b22918c6e17d65f Mon Sep 17 00:00:00 2001 From: Max Asnaashari Date: Wed, 24 Apr 2024 19:11:15 +0000 Subject: [PATCH 09/11] doc/how-to: Add reuse and skip documentation Signed-off-by: Max Asnaashari --- doc/how-to/initialise.rst | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/doc/how-to/initialise.rst b/doc/how-to/initialise.rst index 77608549b..5ceea16fa 100644 --- a/doc/how-to/initialise.rst +++ b/doc/how-to/initialise.rst @@ -90,6 +90,35 @@ Complete the following steps to initialise MicroCloud: See an example of the full initialisation process in the :ref:`Get started with MicroCloud ` tutorial. +Excluding MicroCeph or MicroOVN from MicroCloud +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If the MicroOVN or MicroCeph snap is not installed on the system that runs :command:`microcloud init`, you will be prompted with the following question:: + + MicroCeph not found. Continue anyway? (yes/no) [default=yes]: + + MicroOVN not found. Continue anyway? (yes/no) [default=yes]: + +If you choose ``yes``, only existing services will be configured on all systems. +If you choose ``no``, the setup will be cancelled. + +All other systems must have at least the same set of snaps installed as the system that runs :command:`microcloud init`, otherwise they will not be available to select from the list of systems. +Any questions associated to these systems will be skipped. For example, if MicroCeph is not installed, you will not be prompted for distributed storage configuration. + +Reusing an existing MicroCeph or MicroOVN with MicroCloud +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If some of the systems are already part of a MicroCeph or MicroOVN cluster, you can choose to reuse this cluster when initialising MicroCloud when prompted with the following question:: + + "micro01" is already part of a MicroCeph cluster. Do you want to add this cluster to MicroCloud? (add/skip) [default=add]: + + "micro01" is already part of a MicroOVN cluster. Do you want to add this cluster to MicroCloud? (add/skip) [default=add]: + +If you choose ``add``, MicroCloud will add the remaining systems selected for initialisation to the pre-existing cluster. +If you choose ``skip``, the respective service will not be set up at all. + +If more than one MicroCeph or MicroOVN cluster exists among the systems, the MicroCloud initialisation will be cancelled. + .. _howto-initialise-preseed: Non-interactive configuration From 5636937af59c34d97cec2893a40781eba9c9e36f Mon Sep 17 00:00:00 2001 From: Max Asnaashari Date: Fri, 26 Jan 2024 03:46:07 +0000 Subject: [PATCH 10/11] microcloud/test/includes: Add reuse-cluster input to interactive test helper Signed-off-by: Max Asnaashari --- microcloud/test/includes/microcloud.sh | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/microcloud/test/includes/microcloud.sh b/microcloud/test/includes/microcloud.sh index 69753776e..c8a113d76 100644 --- a/microcloud/test/includes/microcloud.sh +++ b/microcloud/test/includes/microcloud.sh @@ -2,7 +2,7 @@ # unset_interactive_vars: Unsets all variables related to the test console. unset_interactive_vars() { - unset LOOKUP_IFACE LIMIT_SUBNET SKIP_SERVICE EXPECT_PEERS \ + unset LOOKUP_IFACE LIMIT_SUBNET SKIP_SERVICE EXPECT_PEERS REUSE_EXISTING REUSE_EXISTING_COUNT \ SETUP_ZFS ZFS_FILTER ZFS_WIPE \ SETUP_CEPH CEPH_WARNING CEPH_FILTER CEPH_WIPE SETUP_CEPHFS \ SETUP_OVN OVN_WARNING OVN_FILTER IPV4_SUBNET IPV4_START IPV4_END DNS_ADDRESSES IPV6_SUBNET @@ -46,6 +46,16 @@ select-all # select all the sys $(true) # workaround for set -e " +if [ -n "${REUSE_EXISTING}" ]; then + for i in $(seq 1 "${REUSE_EXISTING_COUNT}") ; do + setup=$(cat << EOF +${setup} +${REUSE_EXISTING} +EOF +) + done +fi + if [ -n "${SETUP_ZFS}" ]; then setup="${setup} ${SETUP_ZFS} # add local disks (yes/no) From 3424df479bcc956b862660c55d200bc21342437f Mon Sep 17 00:00:00 2001 From: Max Asnaashari Date: Fri, 26 Jan 2024 03:48:19 +0000 Subject: [PATCH 11/11] microcloud/test/suites: Add reuse-cluster tests Signed-off-by: Max Asnaashari --- microcloud/test/includes/microcloud.sh | 42 ++++---- microcloud/test/main.sh | 1 + microcloud/test/suites/basic.sh | 133 ++++++++++++++++++++++++- 3 files changed, 155 insertions(+), 21 deletions(-) diff --git a/microcloud/test/includes/microcloud.sh b/microcloud/test/includes/microcloud.sh index c8a113d76..8c1f53bae 100644 --- a/microcloud/test/includes/microcloud.sh +++ b/microcloud/test/includes/microcloud.sh @@ -13,26 +13,28 @@ unset_interactive_vars() { # The lines that are output are based on the values passed to the listed environment variables. # Any unset variables will be omitted. microcloud_interactive() { - LOOKUP_IFACE=${LOOKUP_IFACE:-} # filter string for the lookup interface table. - LIMIT_SUBNET=${LIMIT_SUBNET:-} # (yes/no) input for limiting lookup of systems to the above subnet. - SKIP_SERVICE=${SKIP_SERVICE:-} # (yes/no) input to skip any missing services. Should be unset if all services are installed. - EXPECT_PEERS=${EXPECT_PEERS:-} # wait for this number of systems to be available to join the cluster. - SETUP_ZFS=${SETUP_ZFS:-} # (yes/no) input for initiating ZFS storage pool setup. - ZFS_FILTER=${ZFS_FILTER:-} # filter string for ZFS disks. - ZFS_WIPE=${ZFS_WIPE:-} # (yes/no) to wipe all disks. - SETUP_CEPH=${SETUP_CEPH:-} # (yes/no) input for initiating CEPH storage pool setup. - SETUP_CEPHFS=${SETUP_CEPHFS:-} # (yes/no) input for initialising CephFS storage pool setup. - CEPH_WARNING=${CEPH_WARNING:-} # (yes/no) input for warning about eligible disk detection. - CEPH_FILTER=${CEPH_FILTER:-} # filter string for CEPH disks. - CEPH_WIPE=${CEPH_WIPE:-} # (yes/no) to wipe all disks. - SETUP_OVN=${SETUP_OVN:-} # (yes/no) input for initiating OVN network setup. - OVN_WARNING=${OVN_WARNING:-} # (yes/no) input for warning about eligible interface detection. - OVN_FILTER=${OVN_FILTER:-} # filter string for OVN interfaces. - IPV4_SUBNET=${IPV4_SUBNET:-} # OVN ipv4 gateway subnet. - IPV4_START=${IPV4_START:-} # OVN ipv4 range start. - IPV4_END=${IPV4_END:-} # OVN ipv4 range end. - DNS_ADDRESSES=${DNS_ADDRESSES:-} # OVN custom DNS addresses. - IPV6_SUBNET=${IPV6_SUBNET:-} # OVN ipv6 range. + LOOKUP_IFACE=${LOOKUP_IFACE:-} # filter string for the lookup interface table. + LIMIT_SUBNET=${LIMIT_SUBNET:-} # (yes/no) input for limiting lookup of systems to the above subnet. + SKIP_SERVICE=${SKIP_SERVICE:-} # (yes/no) input to skip any missing services. Should be unset if all services are installed. + EXPECT_PEERS=${EXPECT_PEERS:-} # wait for this number of systems to be available to join the cluster. + REUSE_EXISTING=${REUSE_EXISTING:-} # (yes/no) incorporate an existing clustered service. + REUSE_EXISTING_COUNT=${REUSE_EXISTING_COUNT:-0} # (number) number of existing clusters to incorporate. + SETUP_ZFS=${SETUP_ZFS:-} # (yes/no) input for initiating ZFS storage pool setup. + ZFS_FILTER=${ZFS_FILTER:-} # filter string for ZFS disks. + ZFS_WIPE=${ZFS_WIPE:-} # (yes/no) to wipe all disks. + SETUP_CEPH=${SETUP_CEPH:-} # (yes/no) input for initiating CEPH storage pool setup. + SETUP_CEPHFS=${SETUP_CEPHFS:-} # (yes/no) input for initialising CephFS storage pool setup. + CEPH_WARNING=${CEPH_WARNING:-} # (yes/no) input for warning about eligible disk detection. + CEPH_FILTER=${CEPH_FILTER:-} # filter string for CEPH disks. + CEPH_WIPE=${CEPH_WIPE:-} # (yes/no) to wipe all disks. + SETUP_OVN=${SETUP_OVN:-} # (yes/no) input for initiating OVN network setup. + OVN_WARNING=${OVN_WARNING:-} # (yes/no) input for warning about eligible interface detection. + OVN_FILTER=${OVN_FILTER:-} # filter string for OVN interfaces. + IPV4_SUBNET=${IPV4_SUBNET:-} # OVN ipv4 gateway subnet. + IPV4_START=${IPV4_START:-} # OVN ipv4 range start. + IPV4_END=${IPV4_END:-} # OVN ipv4 range end. + DNS_ADDRESSES=${DNS_ADDRESSES:-} # OVN custom DNS addresses. + IPV6_SUBNET=${IPV6_SUBNET:-} # OVN ipv6 range. setup=" ${LOOKUP_IFACE} # filter the lookup interface diff --git a/microcloud/test/main.sh b/microcloud/test/main.sh index a3d9abffa..396694ea2 100755 --- a/microcloud/test/main.sh +++ b/microcloud/test/main.sh @@ -188,6 +188,7 @@ run_basic_tests() { run_test test_instances_launch "instances launch" run_test test_service_mismatch "service mismatch" run_test test_disk_mismatch "disk mismatch" + run_test test_reuse_cluster "reuse_cluster" } run_interactive_tests() { diff --git a/microcloud/test/suites/basic.sh b/microcloud/test/suites/basic.sh index e3f960bbb..155af598e 100644 --- a/microcloud/test/suites/basic.sh +++ b/microcloud/test/suites/basic.sh @@ -79,7 +79,7 @@ test_interactive() { export IPV4_START="10.1.123.100" export IPV4_END="10.1.123.254" export IPV6_SUBNET="fd42:1:1234:1234::1/64" - export DNS_ADDRESSES="10.1.123.1,8.8.8.8" + export DNS_ADDRESSES="10.1.123.1,8.8.8.8" microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" lxc exec micro01 -- tail -1 out | grep "MicroCloud is ready" -q @@ -822,3 +822,134 @@ test_auto() { ! lxc exec ${m} -- lxc network ls -f csv | grep -q "^UPLINK," || false done } + +# services_validator: A basic validator of 3 systems with typical expected inputs. +services_validator() { + for m in micro01 micro02 micro03 ; do + validate_system_lxd ${m} 3 disk1 1 1 enp6s0 10.1.123.1/24 10.1.123.100-10.1.123.254 fd42:1:1234:1234::1/64 10.1.123.1,8.8.8.8 + validate_system_microceph ${m} 1 disk2 + validate_system_microovn ${m} + done +} + +test_reuse_cluster() { + unset_interactive_vars + + # Set the default config for interactive setup. + export LOOKUP_IFACE="enp5s0" + export LIMIT_SUBNET="yes" + export EXPECT_PEERS=2 + export SETUP_ZFS="yes" + export ZFS_FILTER="lxd_disk1" + export ZFS_WIPE="yes" + export SETUP_CEPH="yes" + export SETUP_CEPHFS="yes" + export CEPH_FILTER="lxd_disk2" + export CEPH_WIPE="yes" + export SETUP_OVN="yes" + export OVN_FILTER="enp6s0" + export IPV4_SUBNET="10.1.123.1/24" + export IPV4_START="10.1.123.100" + export IPV4_END="10.1.123.254" + export DNS_ADDRESSES="10.1.123.1,8.8.8.8" + export IPV6_SUBNET="fd42:1:1234:1234::1/64" + + reset_systems 3 3 3 + echo "Create a MicroCloud that re-uses an existing service" + export REUSE_EXISTING_COUNT=1 + export REUSE_EXISTING="add" + lxc exec micro02 -- microceph cluster bootstrap + microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + services_validator + + reset_systems 3 3 3 + echo "Create a MicroCloud that re-uses an existing service on the local node" + lxc exec micro01 -- microceph cluster bootstrap + microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + services_validator + + reset_systems 3 3 3 + echo "Create a MicroCloud that re-uses an existing MicroCeph and MicroOVN" + export REUSE_EXISTING_COUNT=2 + export REUSE_EXISTING="add" + lxc exec micro02 -- microceph cluster bootstrap + lxc exec micro02 -- microovn cluster bootstrap + microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + services_validator + + reset_systems 3 3 3 + echo "Create a MicroCloud that re-uses an existing MicroCeph and MicroOVN on different nodes" + lxc exec micro02 -- microceph cluster bootstrap + lxc exec micro03 -- microovn cluster bootstrap + microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + services_validator + + reset_systems 3 3 3 + echo "Create a MicroCloud that re-uses an existing service with multiple nodes from this cluster" + export REUSE_EXISTING_COUNT=1 + export REUSE_EXISTING="add" + lxc exec micro02 -- microceph cluster bootstrap + token="$(lxc exec micro02 -- microceph cluster add micro01)" + lxc exec micro01 -- microceph cluster join "${token}" + microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + services_validator + + reset_systems 3 3 3 + echo "Create a MicroCloud that re-uses an existing existing service with all nodes from this cluster" + lxc exec micro02 -- microceph cluster bootstrap + token="$(lxc exec micro02 -- microceph cluster add micro01)" + lxc exec micro01 -- microceph cluster join "${token}" + token="$(lxc exec micro02 -- microceph cluster add micro03)" + lxc exec micro03 -- microceph cluster join "${token}" + microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + services_validator + + reset_systems 4 3 3 + echo "Create a MicroCloud that re-uses an existing existing service with foreign cluster members" + lxc exec micro04 -- snap disable microcloud + lxc exec micro02 -- microceph cluster bootstrap + token="$(lxc exec micro02 -- microceph cluster add micro04)" + lxc exec micro04 -- microceph cluster join "${token}" + microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" + services_validator + validate_system_microceph micro04 1 + + reset_systems 3 3 3 + echo "Fail to create a MicroCloud due to an existing service if --auto specified" + lxc exec micro02 -- microceph cluster bootstrap + ! lxc exec micro01 -- sh -c "TEST_CONSOLE=0 microcloud init --auto > out" || true + + + + echo "Fail to create a MicroCloud due to conflicting existing services" + lxc exec micro03 -- microceph cluster bootstrap + ! microcloud_interactive | lxc exec micro01 -- sh -c "microcloud init > out" || true + + reset_systems 3 3 3 + echo "Create a MicroCloud that re-uses an existing service with preseed" + addr=$(lxc ls micro01 -f csv -c4 | grep enp5s0 | cut -d' ' -f1) + lxc exec micro01 --env TEST_CONSOLE=0 -- microcloud init --preseed << EOF +lookup_subnet: ${addr}/24 +lookup_interface: enp5s0 +reuse_existing_clusters: true +systems: +- name: micro01 +- name: micro02 +- name: micro03 +ovn: + ipv4_gateway: 10.1.123.1/24 + ipv4_range: 10.1.123.100-10.1.123.254 + ipv6_gateway: fd42:1:1234:1234::1/64 + dns_servers: 10.1.123.1,8.8.8.8 +storage: + local: + - find: id == sdb + wipe: true + ceph: + - find: id == sdc + wipe: true + cephfs: true +EOF + + services_validator +}