diff --git a/CHANGELOG b/CHANGELOG index decf4dee..180d76aa 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -4,6 +4,32 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [1.0.0-rc.1] - 2020-10-22 +### Change +* Added `GUBER_DATA_CENTER` as a config option +* Use `GUBER_PEER_DISCOVERY_TYPE` to pick a peer discovery type, removed +'Enable' options from k8s, etcd, and member-list. +* Added `GUBER_ADVERTISE_ADDRESS` to specify which address is published for +discovery +* Gubernator now attempts to detect the proper `GUBER_ADVERTISE_ADDRESS` if +not specified +* Gubernator now binds to `localhost` by default instead of binding to +`0.0.0.0:80` to avoid allowing + access to a test version of gubernator from the network. +* Fix inconsistent tests failing #57 +* Fix GRPC/HTTP Gateway #50 +* Renamed functions to ensure clarity of version +* Removed deprecated `EtcdAdvertiseAddress` config option +* Refactored configuration options +* `member-list` metadata no longer assumes the member-list address is the same + as the gubernator advertise address. +* Now MD5 sums the peer address key when using replicated hash. This ensures + better key distribution when using domain names or ip address that are very + similar. (gubernator-1, gubernator-2, etc...) +* Now defaults to `replicated-hash` if `GUBER_PEER_PICKER` is unset +* Added support for DataCenter fields when using etcd discovery +* Now storing member-list metadata as JSON instead of glob + ## [0.9.2] - 2020-10-23 ### Change * ETCD discovery now sets the IsOwner property when updating the peers list. diff --git a/Dockerfile b/Dockerfile index 37b5c240..d430f64b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,11 +10,12 @@ RUN go mod download # Copy the local package files to the container ADD . /go/src -ENV VERSION=dev-build -# Build the bot inside the container +ARG VERSION + +# Build the server inside the container RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -a -installsuffix cgo \ - -ldflags "-w -s -X main.Version=${VERSION}" -o /gubernator /go/src/cmd/gubernator/main.go /go/src/cmd/gubernator/config.go + -ldflags "-w -s -X main.Version=$VERSION" -o /gubernator /go/src/cmd/gubernator/main.go # Create our deploy image FROM scratch diff --git a/Makefile b/Makefile index 6bb32266..fadf6133 100644 --- a/Makefile +++ b/Makefile @@ -6,15 +6,15 @@ VERSION=$(shell cat version) LDFLAGS="-X main.Version=$(VERSION)" test: - go test ./... -v -race -count=1 + go test ./... -v -race -p=1 -count=1 docker: docker build --build-arg VERSION=$(VERSION) -t thrawn01/gubernator:$(VERSION) . docker tag thrawn01/gubernator:$(VERSION) thrawn01/gubernator:latest release: - GOOS=darwin GOARCH=amd64 go build -ldflags $(LDFLAGS) -o gubernator.darwin ./cmd/gubernator/main.go ./cmd/gubernator/config.go - GOOS=linux GOARCH=amd64 go build -ldflags $(LDFLAGS) -o gubernator.linux ./cmd/gubernator/main.go ./cmd/gubernator/config.go + GOOS=darwin GOARCH=amd64 go build -ldflags $(LDFLAGS) -o gubernator.darwin ./cmd/gubernator/main.go + GOOS=linux GOARCH=amd64 go build -ldflags $(LDFLAGS) -o gubernator.linux ./cmd/gubernator/main.go proto: scripts/proto.sh diff --git a/algorithms.go b/algorithms.go index 2f5db84d..f1a07064 100644 --- a/algorithms.go +++ b/algorithms.go @@ -17,7 +17,7 @@ limitations under the License. package gubernator import ( - "time" + "github.com/mailgun/holster/v3/clock" ) // Implements token bucket algorithm for rate limiting. https://en.wikipedia.org/wiki/Token_bucket @@ -88,7 +88,7 @@ func tokenBucket(s Store, c Cache, r *RateLimitReq) (resp *RateLimitResp, err er if t.Duration != r.Duration { expire := t.CreatedAt + r.Duration if HasBehavior(r.Behavior, Behavior_DURATION_IS_GREGORIAN) { - expire, err = GregorianExpiration(time.Now(), r.Duration) + expire, err = GregorianExpiration(clock.Now(), r.Duration) if err != nil { return nil, err } @@ -138,7 +138,7 @@ func tokenBucket(s Store, c Cache, r *RateLimitReq) (resp *RateLimitResp, err er now := MillisecondNow() expire := now + r.Duration if HasBehavior(r.Behavior, Behavior_DURATION_IS_GREGORIAN) { - expire, err = GregorianExpiration(time.Now(), r.Duration) + expire, err = GregorianExpiration(clock.Now(), r.Duration) if err != nil { return nil, err } @@ -214,11 +214,11 @@ func leakyBucket(s Store, c Cache, r *RateLimitReq) (resp *RateLimitResp, err er duration := r.Duration rate := float64(duration) / float64(r.Limit) if HasBehavior(r.Behavior, Behavior_DURATION_IS_GREGORIAN) { - d, err := GregorianDuration(time.Now(), r.Duration) + d, err := GregorianDuration(clock.Now(), r.Duration) if err != nil { return nil, err } - n := time.Now() + n := clock.Now() expire, err := GregorianExpiration(n, r.Duration) if err != nil { return nil, err @@ -291,7 +291,7 @@ func leakyBucket(s Store, c Cache, r *RateLimitReq) (resp *RateLimitResp, err er duration := r.Duration if HasBehavior(r.Behavior, Behavior_DURATION_IS_GREGORIAN) { - n := time.Now() + n := clock.Now() expire, err := GregorianExpiration(n, r.Duration) if err != nil { return nil, err diff --git a/benchmark_test.go b/benchmark_test.go index ac45c96f..4968275a 100644 --- a/benchmark_test.go +++ b/benchmark_test.go @@ -51,7 +51,7 @@ func BenchmarkServer_GetPeerRateLimitNoBatching(b *testing.B) { } func BenchmarkServer_GetRateLimit(b *testing.B) { - client, err := guber.DialV1Server(cluster.GetRandomPeer().Address) + client, err := guber.DialV1Server(cluster.GetRandomPeer().GRPCAddress) if err != nil { b.Errorf("NewV1Client err: %s", err) } @@ -77,7 +77,7 @@ func BenchmarkServer_GetRateLimit(b *testing.B) { } func BenchmarkServer_Ping(b *testing.B) { - client, err := guber.DialV1Server(cluster.GetRandomPeer().Address) + client, err := guber.DialV1Server(cluster.GetRandomPeer().GRPCAddress) if err != nil { b.Errorf("NewV1Client err: %s", err) } @@ -105,7 +105,7 @@ func BenchmarkServer_Ping(b *testing.B) { }*/ func BenchmarkServer_ThunderingHeard(b *testing.B) { - client, err := guber.DialV1Server(cluster.GetRandomPeer().Address) + client, err := guber.DialV1Server(cluster.GetRandomPeer().GRPCAddress) if err != nil { b.Errorf("NewV1Client err: %s", err) } diff --git a/cache.go b/cache.go index 33e4ccad..d22455ae 100644 --- a/cache.go +++ b/cache.go @@ -21,8 +21,8 @@ package gubernator import ( "container/list" "sync" - "time" + "github.com/mailgun/holster/v3/clock" "github.com/mailgun/holster/v3/setter" "github.com/prometheus/client_golang/prometheus" ) @@ -131,7 +131,7 @@ func (c *LRUCache) Add(record *CacheItem) bool { // Return unix epoch in milliseconds func MillisecondNow() int64 { - return time.Now().UnixNano() / 1000000 + return clock.Now().UnixNano() / 1000000 } // GetItem returns the item stored in the cache diff --git a/client.go b/client.go index fbe6aac7..e325e369 100644 --- a/client.go +++ b/client.go @@ -18,8 +18,8 @@ package gubernator import ( "math/rand" - "time" + "github.com/mailgun/holster/v3/clock" "github.com/pkg/errors" "google.golang.org/grpc" ) @@ -48,23 +48,26 @@ func DialV1Server(server string) (V1Client, error) { return NewV1Client(conn), nil } -// Convert a time.Duration to a unix millisecond timestamp -func ToTimeStamp(duration time.Duration) int64 { - return int64(duration / time.Millisecond) +// Convert a clock.Duration to a unix millisecond timestamp +func ToTimeStamp(duration clock.Duration) int64 { + return int64(duration / clock.Millisecond) } // Convert a unix millisecond timestamp to a time.Duration -func FromTimeStamp(ts int64) time.Duration { - return time.Now().Sub(FromUnixMilliseconds(ts)) +func FromTimeStamp(ts int64) clock.Duration { + return clock.Now().Sub(FromUnixMilliseconds(ts)) } -func FromUnixMilliseconds(ts int64) time.Time { - return time.Unix(0, ts*int64(time.Millisecond)) +func FromUnixMilliseconds(ts int64) clock.Time { + return clock.Unix(0, ts*int64(clock.Millisecond)) } // Given a list of peers, return a random peer func RandomPeer(peers []PeerInfo) PeerInfo { - return peers[rand.Intn(len(peers))] + rand.Shuffle(len(peers), func(i, j int) { + peers[i], peers[j] = peers[j], peers[i] + }) + return peers[0] } // Return a random alpha string of 'n' length diff --git a/cluster/cluster.go b/cluster/cluster.go index 03e855ea..1fedc168 100644 --- a/cluster/cluster.go +++ b/cluster/cluster.go @@ -17,159 +17,102 @@ limitations under the License. package cluster import ( - "fmt" - "net" - "time" + "context" + "math/rand" "github.com/mailgun/gubernator" + "github.com/mailgun/holster/v3/clock" "github.com/pkg/errors" "github.com/sirupsen/logrus" - "google.golang.org/grpc" ) -type instance struct { - GRPC *grpc.Server - Guber *gubernator.Instance - Address string -} - -func (i *instance) Peers() []gubernator.PeerInfo { - var result []gubernator.PeerInfo - for _, peer := range peers { - if peer.Address == i.Address { - peer.IsOwner = true - } - result = append(result, peer) - } - return result -} - -func (i *instance) Stop() error { - err := i.Guber.Close() - i.GRPC.GracefulStop() - return err -} - -var instances []*instance +var daemons []*gubernator.Daemon var peers []gubernator.PeerInfo -// Returns default testing configuration -func GetDefaultConfig() gubernator.Config { - return gubernator.Config{ - Behaviors: gubernator.BehaviorConfig{ - GlobalSyncWait: time.Millisecond * 50, // Suitable for testing but not production - GlobalTimeout: time.Second, - MultiRegionSyncWait: time.Millisecond * 50, // Suitable for testing but not production - MultiRegionTimeout: time.Second, - }, - } -} - // Returns a random peer from the cluster func GetRandomPeer() gubernator.PeerInfo { - return gubernator.RandomPeer(peers) + return peers[rand.Intn(len(peers))] } -// Returns a specific peer -func PeerAt(idx int) gubernator.PeerInfo { - return peers[idx] +// Returns a list of all peers in the cluster +func GetPeers() []gubernator.PeerInfo { + return peers } -// Returns a specific instance -func InstanceAt(idx int) *instance { - return instances[idx] +// Returns a list of all deamons in the cluster +func GetDaemons() []*gubernator.Daemon { + return daemons } -// Return the specific instance for a host -func InstanceForHost(host string) *instance { - for i := range instances { - if instances[i].Address == host { - return instances[i] - } - } - return nil +// Returns a specific peer +func PeerAt(idx int) gubernator.PeerInfo { + return peers[idx] } -// Stop an instance without updating peers, used to cause connection errors -func StopInstanceAt(idx int) { - instances[idx].Stop() +// Returns a specific daemon +func DaemonAt(idx int) *gubernator.Daemon { + return daemons[idx] } // Returns the number of instances -func NumOfInstances() int { - return len(instances) +func NumOfDaemons() int { + return len(daemons) } // Start a local cluster of gubernator servers func Start(numInstances int) error { - addresses := make([]string, numInstances, numInstances) - return StartWith(addresses) + peers := make([]gubernator.PeerInfo, numInstances, numInstances) + return StartWith(peers) +} + +func Restart(ctx context.Context) { + for i := 0; i < len(daemons); i++ { + daemons[i].Close() + daemons[i].Start(ctx) + daemons[i].SetPeers(peers) + } } // Start a local cluster with specific addresses -func StartWith(addresses []string) error { - config := GetDefaultConfig() - for _, address := range addresses { - ins, err := StartInstance(address, config) +func StartWith(localPeers []gubernator.PeerInfo) error { + for _, peer := range localPeers { + ctx, cancel := context.WithTimeout(context.Background(), clock.Second*10) + d, err := gubernator.SpawnDaemon(ctx, gubernator.DaemonConfig{ + Logger: logrus.WithField("instance", peer.GRPCAddress), + GRPCListenAddress: peer.GRPCAddress, + HTTPListenAddress: peer.HTTPAddress, + Behaviors: gubernator.BehaviorConfig{ + // Suitable for testing but not production + GlobalSyncWait: clock.Millisecond * 50, + GlobalTimeout: clock.Second * 5, + BatchTimeout: clock.Second * 5, + MultiRegionTimeout: clock.Second * 5, + }, + }) + cancel() if err != nil { - return errors.Wrapf(err, "while starting instance for addr '%s'", address) + return errors.Wrapf(err, "while starting server for addr '%s'", peer.GRPCAddress) } - // Add the peers and instances to the package level variables - peers = append(peers, gubernator.PeerInfo{Address: ins.Address}) - instances = append(instances, ins) + // Add the peers and daemons to the package level variables + peers = append(peers, gubernator.PeerInfo{ + GRPCAddress: d.GRPCListener.Addr().String(), + HTTPAddress: d.HTTPListener.Addr().String(), + }) + daemons = append(daemons, d) } // Tell each instance about the other peers - for _, ins := range instances { - ins.Guber.SetPeers(ins.Peers()) + for _, d := range daemons { + d.SetPeers(peers) } return nil } func Stop() { - for _, ins := range instances { - ins.Stop() - } -} - -// Start a single instance of gubernator with the provided config and listening address. -// If address is empty string a random port on the loopback device will be chosen. -func StartInstance(address string, conf gubernator.Config) (*instance, error) { - conf.GRPCServer = grpc.NewServer() - - guber, err := gubernator.New(conf) - if err != nil { - return nil, errors.Wrap(err, "while creating new gubernator instance") - } - - listener, err := net.Listen("tcp", address) - if err != nil { - return nil, errors.Wrap(err, "while listening on random interface") + for _, d := range daemons { + d.Close() } - - go func() { - logrus.Infof("Listening on %s", listener.Addr().String()) - if err := conf.GRPCServer.Serve(listener); err != nil { - fmt.Printf("while serving: %s\n", err) - } - }() - - // Wait until the instance responds to connect - for i := 0; i < 10; i++ { - conn, err := net.Dial("tcp", address) - if err != nil { - break - } - conn.Close() - time.Sleep(time.Millisecond * 50) - } - - guber.SetPeers([]gubernator.PeerInfo{{Address: listener.Addr().String(), IsOwner: true}}) - - return &instance{ - Address: listener.Addr().String(), - GRPC: conf.GRPCServer, - Guber: guber, - }, nil + peers = nil + daemons = nil } diff --git a/cluster/cluster_test.go b/cluster/cluster_test.go index 9658cfef..e44a6e9f 100644 --- a/cluster/cluster_test.go +++ b/cluster/cluster_test.go @@ -14,204 +14,60 @@ See the License for the specific language governing permissions and limitations under the License. */ -package cluster +package cluster_test import ( "testing" "github.com/mailgun/gubernator" + "github.com/mailgun/gubernator/cluster" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) -func Test_instance_Peers(t *testing.T) { - tests := []struct { - name string - instance *instance - peers []gubernator.PeerInfo - want []gubernator.PeerInfo - }{ - { - name: "Happy path", - instance: &instance{Address: "mailgun.com"}, - peers: []gubernator.PeerInfo{{Address: "mailgun.com"}}, - want: []gubernator.PeerInfo{ - {Address: "mailgun.com", IsOwner: true}, - }, - }, - { - name: "Get multy peers", - instance: &instance{Address: "mailgun.com"}, - peers: []gubernator.PeerInfo{{Address: "localhost:11111"}, {Address: "mailgun.com"}}, - want: []gubernator.PeerInfo{ - {Address: "localhost:11111"}, - {Address: "mailgun.com", IsOwner: true}, - }, - }, - { - name: "No Peers", - instance: &instance{Address: "www.mailgun.com:11111"}, - peers: []gubernator.PeerInfo{}, - want: []gubernator.PeerInfo(nil), - }, - { - name: "Peers are nil", - instance: &instance{Address: "www.mailgun.com:11111"}, - peers: nil, - want: []gubernator.PeerInfo(nil), - }, - { - name: "Owner does not exist", - instance: &instance{Address: "mailgun.com"}, - peers: []gubernator.PeerInfo{{Address: "localhost:11111"}}, - want: []gubernator.PeerInfo{ - {Address: "localhost:11111"}, - }, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - peers = tt.peers - - got := tt.instance.Peers() - - assert.Equal(t, tt.want, got) - }) - } -} - -func TestGetPeer(t *testing.T) { - tests := []struct { - name string - peers []gubernator.PeerInfo - oneOf map[string]bool - }{ - { - name: "Happy path", - peers: []gubernator.PeerInfo{{Address: "mailgun.com"}}, - }, - { - name: "Get one peer from multiple peers", - peers: []gubernator.PeerInfo{{Address: "mailgun.com"}, {Address: "localhost"}, {Address: "test.com"}}, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - peers = tt.peers - got := GetRandomPeer() - - assert.Contains(t, peers, got) - }) - } -} - -func TestPeerAt(t *testing.T) { - peers = []gubernator.PeerInfo{{Address: "mailgun.com"}} - - got := PeerAt(0) - want := gubernator.PeerInfo{Address: "mailgun.com"} - - assert.Equal(t, want, got) -} - -func TestInstanceAt(t *testing.T) { - tests := []struct { - name string - instances []*instance - index int - want *instance - }{ - { - name: "Get first instance", - instances: []*instance{ - {Address: "test.com"}, - {Address: "localhost"}, - }, - index: 0, - want: &instance{Address: "test.com"}, - }, - { - name: "Get second instance", - instances: []*instance{ - {Address: "mailgun.com"}, - {Address: "google.com"}, - }, - index: 1, - want: &instance{Address: "google.com"}, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - instances = tt.instances - - got := InstanceAt(tt.index) - - assert.Equal(t, tt.want, got) - }) - } -} - func TestStartMultipleInstances(t *testing.T) { - // to be tests independent we need to reset the global variables - instances = nil - peers = nil - - err := Start(2) - assert.Nil(t, err) + err := cluster.Start(2) + require.NoError(t, err) + defer cluster.Stop() - assert.Equal(t, 2, len(instances)) - assert.Equal(t, 2, len(peers)) + assert.Equal(t, 2, len(cluster.GetPeers())) + assert.Equal(t, 2, len(cluster.GetDaemons())) } -func TestStartZeroInstances(t *testing.T) { - // to be tests independent we need to reset the global variables - instances = nil - peers = nil +func TestStartOneInstance(t *testing.T) { + err := cluster.Start(1) + require.NoError(t, err) + defer cluster.Stop() - err := Start(0) - assert.Nil(t, err) - - assert.Equal(t, 0, len(instances)) - assert.Equal(t, 0, len(peers)) + assert.Equal(t, 1, len(cluster.GetPeers())) + assert.Equal(t, 1, len(cluster.GetDaemons())) } -func TestStartMultipleInstancesWithAddresses(t *testing.T) { - // to be tests independent we need to reset the global variables - instances = nil - peers = nil - - addresses := []string{"localhost:11111", "localhost:22222"} - err := StartWith(addresses) - assert.Nil(t, err) - - wantPeers := []gubernator.PeerInfo{{Address: "127.0.0.1:11111"}, {Address: "127.0.0.1:22222"}} - wantInstances := []*instance{ - {Address: "127.0.0.1:11111"}, - {Address: "127.0.0.1:22222"}, +func TestStartMultipleDaemons(t *testing.T) { + peers := []gubernator.PeerInfo{ + {GRPCAddress: "localhost:1111", HTTPAddress: "localhost:1112"}, + {GRPCAddress: "localhost:2222", HTTPAddress: "localhost:2221"}} + err := cluster.StartWith(peers) + require.NoError(t, err) + defer cluster.Stop() + + wantPeers := []gubernator.PeerInfo{ + {GRPCAddress: "127.0.0.1:1111", HTTPAddress: "127.0.0.1:1112"}, + {GRPCAddress: "127.0.0.1:2222", HTTPAddress: "127.0.0.1:2221"}, } - assert.Equal(t, wantPeers, peers) - assert.Equal(t, 2, len(instances)) - assert.Equal(t, wantInstances[0].Address, instances[0].Address) - assert.Equal(t, wantInstances[1].Address, instances[1].Address) + daemons := cluster.GetDaemons() + assert.Equal(t, wantPeers, cluster.GetPeers()) + assert.Equal(t, 2, len(daemons)) + assert.Equal(t, "127.0.0.1:1111", daemons[0].GRPCListener.Addr().String()) + assert.Equal(t, "127.0.0.1:2222", daemons[1].GRPCListener.Addr().String()) + assert.Equal(t, "127.0.0.1:2222", cluster.DaemonAt(1).GRPCListener.Addr().String()) + assert.Equal(t, "127.0.0.1:2222", cluster.PeerAt(1).GRPCAddress) } -func TestStartWithAddressesFail(t *testing.T) { - // to be tests independent we need to reset the global variables - instances = nil - peers = nil - - addresses := []string{"11111"} - err := StartWith(addresses) +func TestStartWithInvalidPeer(t *testing.T) { + err := cluster.StartWith([]gubernator.PeerInfo{{GRPCAddress: "1111"}}) assert.NotNil(t, err) - assert.Nil(t, peers) - assert.Nil(t, instances) -} - -func stringInSlice(a string, list []string) bool { - for _, b := range list { - if b == a { - return true - } - } - return false + assert.Nil(t, cluster.GetPeers()) + assert.Nil(t, cluster.GetDaemons()) } diff --git a/cmd/gubernator-cli/main.go b/cmd/gubernator-cli/main.go index a5fed755..6af16c8e 100644 --- a/cmd/gubernator-cli/main.go +++ b/cmd/gubernator-cli/main.go @@ -21,10 +21,10 @@ import ( "fmt" "math/rand" "os" - "time" "github.com/davecgh/go-spew/spew" guber "github.com/mailgun/gubernator" + "github.com/mailgun/holster/v3/clock" "github.com/mailgun/holster/v3/syncutil" ) @@ -57,7 +57,7 @@ func main() { UniqueKey: guber.RandomString(10), Hits: 1, Limit: randInt(1, 10), - Duration: randInt(int(time.Millisecond*500), int(time.Second*6)), + Duration: randInt(int(clock.Millisecond*500), int(clock.Second*6)), Algorithm: guber.Algorithm_TOKEN_BUCKET, }) } @@ -67,7 +67,7 @@ func main() { for _, rateLimit := range rateLimits { fan.Run(func(obj interface{}) error { r := obj.(*guber.RateLimitReq) - ctx, cancel := context.WithTimeout(context.Background(), time.Millisecond*500) + ctx, cancel := context.WithTimeout(context.Background(), clock.Millisecond*500) // Now hit our cluster with the rate limits resp, err := client.GetRateLimits(ctx, &guber.GetRateLimitsReq{ Requests: []*guber.RateLimitReq{r}, diff --git a/cmd/gubernator-cluster/main.go b/cmd/gubernator-cluster/main.go index 6726eb86..cc02c30e 100644 --- a/cmd/gubernator-cluster/main.go +++ b/cmd/gubernator-cluster/main.go @@ -21,6 +21,7 @@ import ( "os" "os/signal" + "github.com/mailgun/gubernator" "github.com/mailgun/gubernator/cluster" "github.com/sirupsen/logrus" ) @@ -29,19 +30,19 @@ import ( func main() { logrus.SetLevel(logrus.InfoLevel) // Start a local cluster - err := cluster.StartWith([]string{ - "127.0.0.1:9090", - "127.0.0.1:9091", - "127.0.0.1:9092", - "127.0.0.1:9093", - "127.0.0.1:9094", - "127.0.0.1:9095", + err := cluster.StartWith([]gubernator.PeerInfo{ + {GRPCAddress: "127.0.0.1:9990", HTTPAddress: "127.0.0.1:9980"}, + {GRPCAddress: "127.0.0.1:9991", HTTPAddress: "127.0.0.1:9981"}, + {GRPCAddress: "127.0.0.1:9992", HTTPAddress: "127.0.0.1:9982"}, + {GRPCAddress: "127.0.0.1:9993", HTTPAddress: "127.0.0.1:9983"}, + {GRPCAddress: "127.0.0.1:9994", HTTPAddress: "127.0.0.1:9984"}, + {GRPCAddress: "127.0.0.1:9995", HTTPAddress: "127.0.0.1:9985"}, }) if err != nil { - fmt.Println(err) + panic(err) } - fmt.Println("Ready") + fmt.Println("Running.....") // Wait until we get a INT signal then shutdown the cluster c := make(chan os.Signal, 1) diff --git a/cmd/gubernator/config.go b/cmd/gubernator/config.go deleted file mode 100644 index 19674352..00000000 --- a/cmd/gubernator/config.go +++ /dev/null @@ -1,354 +0,0 @@ -/* -Copyright 2018-2019 Mailgun Technologies Inc - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package main - -import ( - "crypto/tls" - "crypto/x509" - "flag" - "fmt" - "io/ioutil" - "os" - "strconv" - "strings" - "time" - - etcd "github.com/coreos/etcd/clientv3" - "github.com/davecgh/go-spew/spew" - "github.com/mailgun/gubernator" - "github.com/mailgun/holster/v3/setter" - "github.com/pkg/errors" - "github.com/segmentio/fasthash/fnv1" - "github.com/segmentio/fasthash/fnv1a" - "github.com/sirupsen/logrus" - "k8s.io/klog" -) - -var debug = false - -type ServerConfig struct { - GRPCListenAddress string - EtcdAdvertiseAddress string - HTTPListenAddress string - EtcdKeyPrefix string - CacheSize int - DataCenter string - - // Etcd configuration used to find peers - EtcdConf etcd.Config - - // Configure how behaviours behave - Behaviors gubernator.BehaviorConfig - - // K8s configuration used to find peers inside a K8s cluster - K8PoolConf gubernator.K8sPoolConfig - - // Memberlist configuration used to find peers - MemberlistPoolConf gubernator.MemberlistPoolConfig - - // The PeerPicker as selected by `GUBER_PEER_PICKER` - Picker gubernator.PeerPicker -} - -func confFromEnv() (ServerConfig, error) { - var configFile string - var conf ServerConfig - - flags := flag.NewFlagSet("gubernator", flag.ContinueOnError) - flags.StringVar(&configFile, "config", "", "yaml config file") - flags.BoolVar(&debug, "debug", false, "enable debug") - if err := flags.Parse(os.Args[1:]); err != nil { - return conf, err - } - - // in order to prevent logging to /tmp by k8s.io/client-go - // and other kubernetes related dependencies which are using - // klog (https://github.com/kubernetes/klog), we need to - // initialize klog in the way it prints to stderr only. - klog.InitFlags(nil) - flag.Set("logtostderr", "true") - - if debug || os.Getenv("GUBER_DEBUG") != "" { - logrus.SetLevel(logrus.DebugLevel) - logrus.Debug("Debug enabled") - debug = true - } - - if configFile != "" { - log.Infof("Loading env config: %s", configFile) - if err := fromEnvFile(configFile); err != nil { - return conf, err - } - } - - // Main config - setter.SetDefault(&conf.GRPCListenAddress, os.Getenv("GUBER_GRPC_ADDRESS"), "0.0.0.0:81") - setter.SetDefault(&conf.HTTPListenAddress, os.Getenv("GUBER_HTTP_ADDRESS"), "0.0.0.0:80") - setter.SetDefault(&conf.CacheSize, getEnvInteger("GUBER_CACHE_SIZE"), 50000) - setter.SetDefault(&conf.DataCenter, os.Getenv("GUBER_DATA_CENTER"), "") - - // Behaviors - setter.SetDefault(&conf.Behaviors.BatchTimeout, getEnvDuration("GUBER_BATCH_TIMEOUT")) - setter.SetDefault(&conf.Behaviors.BatchLimit, getEnvInteger("GUBER_BATCH_LIMIT")) - setter.SetDefault(&conf.Behaviors.BatchWait, getEnvDuration("GUBER_BATCH_WAIT")) - - setter.SetDefault(&conf.Behaviors.GlobalTimeout, getEnvDuration("GUBER_GLOBAL_TIMEOUT")) - setter.SetDefault(&conf.Behaviors.GlobalBatchLimit, getEnvInteger("GUBER_GLOBAL_BATCH_LIMIT")) - setter.SetDefault(&conf.Behaviors.GlobalSyncWait, getEnvDuration("GUBER_GLOBAL_SYNC_WAIT")) - - setter.SetDefault(&conf.Behaviors.MultiRegionTimeout, getEnvDuration("GUBER_MULTI_REGION_TIMEOUT")) - setter.SetDefault(&conf.Behaviors.MultiRegionBatchLimit, getEnvInteger("GUBER_MULTI_REGION_BATCH_LIMIT")) - setter.SetDefault(&conf.Behaviors.MultiRegionSyncWait, getEnvDuration("GUBER_MULTI_REGION_SYNC_WAIT")) - - // ETCD Config - setter.SetDefault(&conf.EtcdAdvertiseAddress, os.Getenv("GUBER_ETCD_ADVERTISE_ADDRESS"), "127.0.0.1:81") - setter.SetDefault(&conf.EtcdKeyPrefix, os.Getenv("GUBER_ETCD_KEY_PREFIX"), "/gubernator-peers") - setter.SetDefault(&conf.EtcdConf.Endpoints, getEnvSlice("GUBER_ETCD_ENDPOINTS"), []string{"localhost:2379"}) - setter.SetDefault(&conf.EtcdConf.DialTimeout, getEnvDuration("GUBER_ETCD_DIAL_TIMEOUT"), time.Second*5) - setter.SetDefault(&conf.EtcdConf.Username, os.Getenv("GUBER_ETCD_USER")) - setter.SetDefault(&conf.EtcdConf.Password, os.Getenv("GUBER_ETCD_PASSWORD")) - - // Memberlist Config - setter.SetDefault(&conf.MemberlistPoolConf.AdvertiseAddress, os.Getenv("GUBER_MEMBERLIST_ADVERTISE_ADDRESS"), "") - setter.SetDefault(&conf.MemberlistPoolConf.AdvertisePort, getEnvInteger("GUBER_MEMBERLIST_ADVERTISE_PORT"), 7946) - setter.SetDefault(&conf.MemberlistPoolConf.KnownNodes, getEnvSlice("GUBER_MEMBERLIST_KNOWN_NODES"), []string{}) - - // Kubernetes Config - setter.SetDefault(&conf.K8PoolConf.Namespace, os.Getenv("GUBER_K8S_NAMESPACE"), "default") - conf.K8PoolConf.PodIP = os.Getenv("GUBER_K8S_POD_IP") - conf.K8PoolConf.PodPort = os.Getenv("GUBER_K8S_POD_PORT") - conf.K8PoolConf.Selector = os.Getenv("GUBER_K8S_ENDPOINTS_SELECTOR") - - // PeerPicker Config - if pp := os.Getenv("GUBER_PEER_PICKER"); pp != "" { - var replicas int - var hash string - - switch pp { - case "consistent-hash": - setter.SetDefault(&hash, os.Getenv("GUBER_PEER_PICKER_HASH"), "fnv1a") - hashFuncs := map[string]gubernator.HashFunc{ - "fnv1a": fnv1a.HashBytes32, - "fnv1": fnv1.HashBytes32, - "crc32": nil, - } - fn, ok := hashFuncs[hash] - if !ok { - return conf, errors.Errorf("'GUBER_PEER_PICKER_HASH=%s' is invalid; choices are [%s]", - hash, validHashKeys(hashFuncs)) - } - conf.Picker = gubernator.NewConsistantHash(fn) - - case "replicated-hash": - setter.SetDefault(&replicas, getEnvInteger("GUBER_REPLICATED_HASH_REPLICAS"), 1) - conf.Picker = gubernator.NewReplicatedConsistantHash(nil, replicas) - setter.SetDefault(&hash, os.Getenv("GUBER_PEER_PICKER_HASH"), "fnv1a") - hashFuncs := map[string]gubernator.HashFunc64{ - "fnv1a": fnv1a.HashBytes64, - "fnv1": fnv1.HashBytes64, - } - fn, ok := hashFuncs[hash] - if !ok { - return conf, errors.Errorf("'GUBER_PEER_PICKER_HASH=%s' is invalid; choices are [%s]", - hash, validHash64Keys(hashFuncs)) - } - conf.Picker = gubernator.NewReplicatedConsistantHash(fn, replicas) - default: - return conf, errors.Errorf("'GUBER_PEER_PICKER=%s' is invalid; choices are ['replicated-hash', 'consistent-hash']", pp) - } - } - - if anyHasPrefix("GUBER_K8S_", os.Environ()) { - logrus.Debug("K8s peer pool config found") - conf.K8PoolConf.Enabled = true - if conf.K8PoolConf.Selector == "" { - return conf, errors.New("when using k8s for peer discovery, you MUST provide a " + - "`GUBER_K8S_ENDPOINTS_SELECTOR` to select the gubernator peers from the endpoints listing") - } - } - - if anyHasPrefix("GUBER_MEMBERLIST_", os.Environ()) { - logrus.Debug("Memberlist pool config found") - conf.MemberlistPoolConf.Enabled = true - if conf.K8PoolConf.Enabled { - return conf, errors.New("refusing to register gubernator peers with both memberlist and k8s;" + - " remove either `GUBER_MEMBERLIST_*` or `GUBER_K8S_*` variables from the environment") - } - - if len(conf.MemberlistPoolConf.KnownNodes) == 0 { - return conf, errors.New("when using memberlist for peer discovery, you MUST provide a " + - "hostname of a known host in the cluster via `GUBER_MEMBERLIST_KNOWN_NODES`") - } - } - - if anyHasPrefix("GUBER_ETCD_", os.Environ()) { - logrus.Debug("ETCD peer pool config found") - if conf.K8PoolConf.Enabled || conf.MemberlistPoolConf.Enabled { - return conf, errors.New("refusing to register gubernator peers with both etcd, memberlist and k8s;" + - " remove all but one of `GUBER_MEMBERLIST_*`, `GUBER_ETCD_*` or `GUBER_K8S_*` variables from the environment") - } - } - - // If env contains any TLS configuration - if anyHasPrefix("GUBER_ETCD_TLS_", os.Environ()) { - if err := setupTLS(&conf.EtcdConf); err != nil { - return conf, err - } - } - - if debug { - spew.Dump(conf) - } - - return conf, nil -} - -func setupTLS(conf *etcd.Config) error { - var tlsCertFile, tlsKeyFile, tlsCAFile string - - // set `GUBER_ETCD_TLS_ENABLE` and this line will - // create a TLS config with no config. - setter.SetDefault(&conf.TLS, &tls.Config{}) - - setter.SetDefault(&tlsCertFile, os.Getenv("GUBER_ETCD_TLS_CERT")) - setter.SetDefault(&tlsKeyFile, os.Getenv("GUBER_ETCD_TLS_KEY")) - setter.SetDefault(&tlsCAFile, os.Getenv("GUBER_ETCD_TLS_CA")) - - // If the CA file was provided - if tlsCAFile != "" { - setter.SetDefault(&conf.TLS, &tls.Config{}) - - var certPool *x509.CertPool = nil - if pemBytes, err := ioutil.ReadFile(tlsCAFile); err == nil { - certPool = x509.NewCertPool() - certPool.AppendCertsFromPEM(pemBytes) - } else { - return errors.Wrapf(err, "while loading cert CA file '%s'", tlsCAFile) - } - setter.SetDefault(&conf.TLS.RootCAs, certPool) - conf.TLS.InsecureSkipVerify = false - } - - // If the cert and key files are provided attempt to load them - if tlsCertFile != "" && tlsKeyFile != "" { - tlsCert, err := tls.LoadX509KeyPair(tlsCertFile, tlsKeyFile) - if err != nil { - return errors.Wrapf(err, "while loading cert '%s' and key file '%s'", - tlsCertFile, tlsKeyFile) - } - setter.SetDefault(&conf.TLS.Certificates, []tls.Certificate{tlsCert}) - } - - // If no other TLS config is provided this will force connecting with TLS, - // without cert verification - if os.Getenv("GUBER_ETCD_TLS_SKIP_VERIFY") != "" { - setter.SetDefault(&conf.TLS, &tls.Config{}) - conf.TLS.InsecureSkipVerify = true - } - return nil -} - -func anyHasPrefix(prefix string, items []string) bool { - for _, i := range items { - if strings.HasPrefix(i, prefix) { - return true - } - } - return false -} - -func getEnvInteger(name string) int { - v := os.Getenv(name) - if v == "" { - return 0 - } - i, err := strconv.ParseInt(v, 10, 64) - if err != nil { - log.WithError(err).Errorf("while parsing '%s' as an integer", name) - return 0 - } - return int(i) -} - -func getEnvDuration(name string) time.Duration { - v := os.Getenv(name) - if v == "" { - return 0 - } - d, err := time.ParseDuration(v) - if err != nil { - log.WithError(err).Errorf("while parsing '%s' as a duration", name) - return 0 - } - return d -} - -func getEnvSlice(name string) []string { - v := os.Getenv(name) - if v == "" { - return nil - } - return strings.Split(v, ",") -} - -// Take values from a file in the format `GUBER_CONF_ITEM=my-value` and put them into the environment -// lines that begin with `#` are ignored -func fromEnvFile(configFile string) error { - fd, err := os.Open(configFile) - if err != nil { - return fmt.Errorf("while opening config file: %s", err) - } - - contents, err := ioutil.ReadAll(fd) - if err != nil { - return fmt.Errorf("while reading config file '%s': %s", configFile, err) - } - for i, line := range strings.Split(string(contents), "\n") { - // Skip comments, empty lines or lines with tabs - if strings.HasPrefix(line, "#") || strings.HasPrefix(line, " ") || - strings.HasPrefix(line, "\t") || len(line) == 0 { - continue - } - - logrus.Debugf("config: [%d] '%s'", i, line) - parts := strings.SplitN(line, "=", 2) - if len(parts) != 2 { - return errors.Errorf("malformed key=value on line '%d'", i) - } - - if err := os.Setenv(strings.TrimSpace(parts[0]), strings.TrimSpace(parts[1])); err != nil { - return errors.Wrapf(err, "while settings environ for '%s=%s'", parts[0], parts[1]) - } - } - return nil -} - -func validHashKeys(m map[string]gubernator.HashFunc) string { - var rs []string - for k, _ := range m { - rs = append(rs, k) - } - return strings.Join(rs, ",") -} - -func validHash64Keys(m map[string]gubernator.HashFunc64) string { - var rs []string - for k, _ := range m { - rs = append(rs, k) - } - return strings.Join(rs, ",") -} diff --git a/cmd/gubernator/main.go b/cmd/gubernator/main.go index afb06cb7..07021ccc 100644 --- a/cmd/gubernator/main.go +++ b/cmd/gubernator/main.go @@ -18,130 +18,53 @@ package main import ( "context" + "crypto/tls" + "crypto/x509" + "flag" + "fmt" + "io/ioutil" "net" - "net/http" "os" "os/signal" + "runtime" "strconv" "strings" - "github.com/grpc-ecosystem/grpc-gateway/runtime" + etcd "github.com/coreos/etcd/clientv3" + "github.com/davecgh/go-spew/spew" "github.com/mailgun/gubernator" - "github.com/mailgun/holster/etcdutil" - "github.com/mailgun/holster/v3/syncutil" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promhttp" + "github.com/mailgun/holster/v3/clock" + "github.com/mailgun/holster/v3/setter" + "github.com/mailgun/holster/v3/slice" + "github.com/pkg/errors" + "github.com/segmentio/fasthash/fnv1" + "github.com/segmentio/fasthash/fnv1a" "github.com/sirupsen/logrus" - "google.golang.org/grpc" + "k8s.io/klog" ) var log = logrus.WithField("category", "server") var Version = "dev-build" func main() { - var wg syncutil.WaitGroup - var conf ServerConfig - var err error + var configFile string + + logrus.Infof("Gubernator %s (%s/%s)", Version, runtime.GOARCH, runtime.GOOS) + flags := flag.NewFlagSet("gubernator", flag.ContinueOnError) + flags.StringVar(&configFile, "config", "", "yaml config file") + flags.BoolVar(&gubernator.DebugEnabled, "debug", false, "enable debug") + checkErr(flags.Parse(os.Args[1:]), "while parsing flags") // Read our config from the environment or optional environment config file - conf, err = confFromEnv() + conf, err := confFromFile(configFile) checkErr(err, "while getting config") - // The LRU cache we store rate limits in - cache := gubernator.NewLRUCache(conf.CacheSize) - - // cache also implements prometheus.Collector interface - prometheus.MustRegister(cache) - - // Handler to collect duration and API access metrics for GRPC - statsHandler := gubernator.NewGRPCStatsHandler() - - // New GRPC server - grpcSrv := grpc.NewServer( - grpc.StatsHandler(statsHandler), - grpc.MaxRecvMsgSize(1024*1024)) - - // Registers a new gubernator instance with the GRPC server - guber, err := gubernator.New(gubernator.Config{ - LocalPicker: conf.Picker, - GRPCServer: grpcSrv, - Cache: cache, - DataCenter: conf.DataCenter, - }) - checkErr(err, "while creating new gubernator instance") - - // guber instance also implements prometheus.Collector interface - prometheus.MustRegister(guber) - - // Start serving GRPC Requests - wg.Go(func() { - listener, err := net.Listen("tcp", conf.GRPCListenAddress) - checkErr(err, "while starting GRPC listener") - - log.Infof("Gubernator Listening on %s ...", conf.GRPCListenAddress) - checkErr(grpcSrv.Serve(listener), "while starting GRPC server") - }) - - var pool gubernator.PoolInterface - - if conf.K8PoolConf.Enabled { - // Source our list of peers from kubernetes endpoint API - conf.K8PoolConf.OnUpdate = guber.SetPeers - pool, err = gubernator.NewK8sPool(conf.K8PoolConf) - checkErr(err, "while querying kubernetes API") - - } else if conf.MemberlistPoolConf.Enabled { - gubernatorPort, err := strconv.Atoi(strings.Split(conf.GRPCListenAddress, ":")[1]) - checkErr(err, "while converting gubernator port to int") - - // Register peer on memberlist - pool, err = gubernator.NewMemberlistPool(gubernator.MemberlistPoolConfig{ - AdvertiseAddress: conf.MemberlistPoolConf.AdvertiseAddress, - AdvertisePort: conf.MemberlistPoolConf.AdvertisePort, - KnownNodes: conf.MemberlistPoolConf.KnownNodes, - LoggerOutput: logrus.WithField("category", "memberlist").Writer(), - DataCenter: conf.DataCenter, - GubernatorPort: gubernatorPort, - OnUpdate: guber.SetPeers, - }) - checkErr(err, "while creating memberlist") - - } else { - // Register ourselves with other peers via ETCD - etcdClient, err := etcdutil.NewClient(&conf.EtcdConf) - checkErr(err, "while connecting to etcd") - - pool, err = gubernator.NewEtcdPool(gubernator.EtcdPoolConfig{ - AdvertiseAddress: conf.EtcdAdvertiseAddress, - OnUpdate: guber.SetPeers, - Client: etcdClient, - BaseKey: conf.EtcdKeyPrefix, - }) - checkErr(err, "while registering with ETCD pool") - } - - ctx, cancel := context.WithCancel(context.Background()) + ctx, cancel := context.WithTimeout(context.Background(), clock.Second*10) defer cancel() - // Setup an JSON Gateway API for our GRPC methods - gateway := runtime.NewServeMux() - err = gubernator.RegisterV1HandlerFromEndpoint(ctx, gateway, - conf.EtcdAdvertiseAddress, []grpc.DialOption{grpc.WithInsecure()}) - checkErr(err, "while registering GRPC gateway handler") - - // Serve the JSON Gateway and metrics handlers via standard HTTP/1 - mux := http.NewServeMux() - mux.Handle("/metrics", promhttp.Handler()) - mux.Handle("/", gateway) - httpSrv := &http.Server{Addr: conf.GRPCListenAddress, Handler: mux} - - wg.Go(func() { - listener, err := net.Listen("tcp", conf.HTTPListenAddress) - checkErr(err, "while starting HTTP listener") - - log.Infof("HTTP Gateway Listening on %s ...", conf.HTTPListenAddress) - checkErr(httpSrv.Serve(listener), "while starting HTTP server") - }) + // Start the daemon + daemon, err := gubernator.SpawnDaemon(ctx, conf) + checkErr(err, "while spawning daemon") // Wait here for signals to clean up our mess c := make(chan os.Signal, 1) @@ -149,19 +72,318 @@ func main() { for sig := range c { if sig == os.Interrupt { log.Info("caught interrupt; user requested premature exit") - pool.Close() - httpSrv.Shutdown(ctx) - grpcSrv.GracefulStop() - wg.Stop() - statsHandler.Close() + daemon.Close() os.Exit(0) } } } +func confFromFile(configFile string) (gubernator.DaemonConfig, error) { + var conf gubernator.DaemonConfig + + // in order to prevent logging to /tmp by k8s.io/client-go + // and other kubernetes related dependencies which are using + // klog (https://github.com/kubernetes/klog), we need to + // initialize klog in the way it prints to stderr only. + klog.InitFlags(nil) + flag.Set("logtostderr", "true") + + setter.SetDefault(&gubernator.DebugEnabled, getEnvBool("GUBER_DEBUG")) + if gubernator.DebugEnabled { + logrus.SetLevel(logrus.DebugLevel) + logrus.Debug("Debug enabled") + } + + if configFile != "" { + log.Infof("Loading env config: %s", configFile) + if err := fromEnvFile(configFile); err != nil { + return conf, err + } + } + + // Main config + setter.SetDefault(&conf.GRPCListenAddress, os.Getenv("GUBER_GRPC_ADDRESS"), "localhost:81") + setter.SetDefault(&conf.HTTPListenAddress, os.Getenv("GUBER_HTTP_ADDRESS"), "localhost:80") + setter.SetDefault(&conf.CacheSize, getEnvInteger("GUBER_CACHE_SIZE"), 50_000) + setter.SetDefault(&conf.DataCenter, os.Getenv("GUBER_DATA_CENTER"), "") + + setter.SetDefault(&conf.AdvertiseAddress, os.Getenv("GUBER_ADVERTISE_ADDRESS"), conf.GRPCListenAddress) + + advAddr, advPort, err := net.SplitHostPort(conf.AdvertiseAddress) + if err != nil { + return conf, errors.Wrap(err, "GUBER_ADVERTISE_ADDRESS is invalid; expected format is `address:port`") + } + advAddr, err = gubernator.ResolveHostIP(advAddr) + if err != nil { + return conf, errors.Wrap(err, "failed to discover host ip for GUBER_ADVERTISE_ADDRESS") + } + conf.AdvertiseAddress = net.JoinHostPort(advAddr, advPort) + + // Behaviors + setter.SetDefault(&conf.Behaviors.BatchTimeout, getEnvDuration("GUBER_BATCH_TIMEOUT")) + setter.SetDefault(&conf.Behaviors.BatchLimit, getEnvInteger("GUBER_BATCH_LIMIT")) + setter.SetDefault(&conf.Behaviors.BatchWait, getEnvDuration("GUBER_BATCH_WAIT")) + + setter.SetDefault(&conf.Behaviors.GlobalTimeout, getEnvDuration("GUBER_GLOBAL_TIMEOUT")) + setter.SetDefault(&conf.Behaviors.GlobalBatchLimit, getEnvInteger("GUBER_GLOBAL_BATCH_LIMIT")) + setter.SetDefault(&conf.Behaviors.GlobalSyncWait, getEnvDuration("GUBER_GLOBAL_SYNC_WAIT")) + + setter.SetDefault(&conf.Behaviors.MultiRegionTimeout, getEnvDuration("GUBER_MULTI_REGION_TIMEOUT")) + setter.SetDefault(&conf.Behaviors.MultiRegionBatchLimit, getEnvInteger("GUBER_MULTI_REGION_BATCH_LIMIT")) + setter.SetDefault(&conf.Behaviors.MultiRegionSyncWait, getEnvDuration("GUBER_MULTI_REGION_SYNC_WAIT")) + + choices := []string{"member-list", "k8s", "etcd"} + setter.SetDefault(&conf.PeerDiscoveryType, os.Getenv("GUBER_PEER_DISCOVERY_TYPE"), "member-list") + if !slice.ContainsString(conf.PeerDiscoveryType, choices, nil) { + return conf, fmt.Errorf("GUBER_PEER_DISCOVERY_TYPE is invalid; choices are [%s]`", strings.Join(choices, ",")) + } + + // ETCD Config + setter.SetDefault(&conf.EtcdPoolConf.KeyPrefix, os.Getenv("GUBER_ETCD_KEY_PREFIX"), "/gubernator-peers") + setter.SetDefault(&conf.EtcdPoolConf.EtcdConfig, &etcd.Config{}) + setter.SetDefault(&conf.EtcdPoolConf.EtcdConfig.Endpoints, getEnvSlice("GUBER_ETCD_ENDPOINTS"), []string{"localhost:2379"}) + setter.SetDefault(&conf.EtcdPoolConf.EtcdConfig.DialTimeout, getEnvDuration("GUBER_ETCD_DIAL_TIMEOUT"), clock.Second*5) + setter.SetDefault(&conf.EtcdPoolConf.EtcdConfig.Username, os.Getenv("GUBER_ETCD_USER")) + setter.SetDefault(&conf.EtcdPoolConf.EtcdConfig.Password, os.Getenv("GUBER_ETCD_PASSWORD")) + setter.SetDefault(&conf.EtcdPoolConf.AdvertiseAddress, os.Getenv("GUBER_ETCD_ADVERTISE_ADDRESS"), conf.AdvertiseAddress) + setter.SetDefault(&conf.EtcdPoolConf.DataCenter, os.Getenv("GUBER_ETCD_DATA_CENTER"), conf.DataCenter) + + setter.SetDefault(&conf.MemberListPoolConf.AdvertiseAddress, os.Getenv("GUBER_MEMBERLIST_ADVERTISE_ADDRESS"), conf.AdvertiseAddress) + setter.SetDefault(&conf.MemberListPoolConf.MemberListAddress, os.Getenv("GUBER_MEMBERLIST_ADDRESS"), fmt.Sprintf("%s:7946", advAddr)) + setter.SetDefault(&conf.MemberListPoolConf.KnownNodes, getEnvSlice("GUBER_MEMBERLIST_KNOWN_NODES"), []string{}) + setter.SetDefault(&conf.MemberListPoolConf.DataCenter, conf.DataCenter) + + // Kubernetes Config + setter.SetDefault(&conf.K8PoolConf.Namespace, os.Getenv("GUBER_K8S_NAMESPACE"), "default") + conf.K8PoolConf.PodIP = os.Getenv("GUBER_K8S_POD_IP") + conf.K8PoolConf.PodPort = os.Getenv("GUBER_K8S_POD_PORT") + conf.K8PoolConf.Selector = os.Getenv("GUBER_K8S_ENDPOINTS_SELECTOR") + + // PeerPicker Config + if pp := os.Getenv("GUBER_PEER_PICKER"); pp != "" { + var replicas int + var hash string + + switch pp { + case "consistent-hash": + setter.SetDefault(&hash, os.Getenv("GUBER_PEER_PICKER_HASH"), "fnv1a") + hashFuncs := map[string]gubernator.HashFunc{ + "fnv1a": fnv1a.HashBytes32, + "fnv1": fnv1.HashBytes32, + "crc32": nil, + } + fn, ok := hashFuncs[hash] + if !ok { + return conf, errors.Errorf("'GUBER_PEER_PICKER_HASH=%s' is invalid; choices are [%s]", + hash, validHashKeys(hashFuncs)) + } + conf.Picker = gubernator.NewConsistentHash(fn) + + case "replicated-hash": + setter.SetDefault(&replicas, getEnvInteger("GUBER_REPLICATED_HASH_REPLICAS"), gubernator.DefaultReplicas) + conf.Picker = gubernator.NewReplicatedConsistentHash(nil, replicas) + setter.SetDefault(&hash, os.Getenv("GUBER_PEER_PICKER_HASH"), "fnv1a") + hashFuncs := map[string]gubernator.HashFunc64{ + "fnv1a": fnv1a.HashBytes64, + "fnv1": fnv1.HashBytes64, + } + fn, ok := hashFuncs[hash] + if !ok { + return conf, errors.Errorf("'GUBER_PEER_PICKER_HASH=%s' is invalid; choices are [%s]", + hash, validHash64Keys(hashFuncs)) + } + conf.Picker = gubernator.NewReplicatedConsistentHash(fn, replicas) + default: + return conf, errors.Errorf("'GUBER_PEER_PICKER=%s' is invalid; choices are ['replicated-hash', 'consistent-hash']", pp) + } + } + + if anyHasPrefix("GUBER_K8S_", os.Environ()) { + logrus.Debug("K8s peer pool config found") + if conf.K8PoolConf.Selector == "" { + return conf, errors.New("when using k8s for peer discovery, you MUST provide a " + + "`GUBER_K8S_ENDPOINTS_SELECTOR` to select the gubernator peers from the endpoints listing") + } + } + + if anyHasPrefix("GUBER_MEMBERLIST_", os.Environ()) { + logrus.Debug("Memberlist pool config found") + if len(conf.MemberListPoolConf.KnownNodes) == 0 { + return conf, errors.New("when using `member-list` for peer discovery, you MUST provide a " + + "hostname of a known host in the cluster via `GUBER_MEMBERLIST_KNOWN_NODES`") + } + } + + if anyHasPrefix("GUBER_ETCD_", os.Environ()) { + logrus.Debug("ETCD peer pool config found") + } + + // If env contains any TLS configuration + if anyHasPrefix("GUBER_ETCD_TLS_", os.Environ()) { + if err := setupTLS(conf.EtcdPoolConf.EtcdConfig); err != nil { + return conf, err + } + } + + if gubernator.DebugEnabled { + spew.Dump(conf) + } + + return conf, nil +} + func checkErr(err error, msg string) { if err != nil { log.WithError(err).Error(msg) os.Exit(1) } } + +func setupTLS(conf *etcd.Config) error { + var tlsCertFile, tlsKeyFile, tlsCAFile string + + // set `GUBER_ETCD_TLS_ENABLE` and this line will + // create a TLS config with no config. + setter.SetDefault(&conf.TLS, &tls.Config{}) + + setter.SetDefault(&tlsCertFile, os.Getenv("GUBER_ETCD_TLS_CERT")) + setter.SetDefault(&tlsKeyFile, os.Getenv("GUBER_ETCD_TLS_KEY")) + setter.SetDefault(&tlsCAFile, os.Getenv("GUBER_ETCD_TLS_CA")) + + // If the CA file was provided + if tlsCAFile != "" { + setter.SetDefault(&conf.TLS, &tls.Config{}) + + var certPool *x509.CertPool = nil + if pemBytes, err := ioutil.ReadFile(tlsCAFile); err == nil { + certPool = x509.NewCertPool() + certPool.AppendCertsFromPEM(pemBytes) + } else { + return errors.Wrapf(err, "while loading cert CA file '%s'", tlsCAFile) + } + setter.SetDefault(&conf.TLS.RootCAs, certPool) + conf.TLS.InsecureSkipVerify = false + } + + // If the cert and key files are provided attempt to load them + if tlsCertFile != "" && tlsKeyFile != "" { + tlsCert, err := tls.LoadX509KeyPair(tlsCertFile, tlsKeyFile) + if err != nil { + return errors.Wrapf(err, "while loading cert '%s' and key file '%s'", + tlsCertFile, tlsKeyFile) + } + setter.SetDefault(&conf.TLS.Certificates, []tls.Certificate{tlsCert}) + } + + // If no other TLS config is provided this will force connecting with TLS, + // without cert verification + if os.Getenv("GUBER_ETCD_TLS_SKIP_VERIFY") != "" { + setter.SetDefault(&conf.TLS, &tls.Config{}) + conf.TLS.InsecureSkipVerify = true + } + return nil +} + +func anyHasPrefix(prefix string, items []string) bool { + for _, i := range items { + if strings.HasPrefix(i, prefix) { + return true + } + } + return false +} + +func getEnvBool(name string) bool { + v := os.Getenv(name) + if v == "" { + return false + } + b, err := strconv.ParseBool(v) + if err != nil { + log.WithError(err).Errorf("while parsing '%s' as an boolean", name) + return false + } + return b +} + +func getEnvInteger(name string) int { + v := os.Getenv(name) + if v == "" { + return 0 + } + i, err := strconv.ParseInt(v, 10, 64) + if err != nil { + log.WithError(err).Errorf("while parsing '%s' as an integer", name) + return 0 + } + return int(i) +} + +func getEnvDuration(name string) clock.Duration { + v := os.Getenv(name) + if v == "" { + return 0 + } + d, err := clock.ParseDuration(v) + if err != nil { + log.WithError(err).Errorf("while parsing '%s' as a duration", name) + return 0 + } + return d +} + +func getEnvSlice(name string) []string { + v := os.Getenv(name) + if v == "" { + return nil + } + return strings.Split(v, ",") +} + +// Take values from a file in the format `GUBER_CONF_ITEM=my-value` and put them into the environment +// lines that begin with `#` are ignored +func fromEnvFile(configFile string) error { + fd, err := os.Open(configFile) + if err != nil { + return fmt.Errorf("while opening config file: %s", err) + } + + contents, err := ioutil.ReadAll(fd) + if err != nil { + return fmt.Errorf("while reading config file '%s': %s", configFile, err) + } + for i, line := range strings.Split(string(contents), "\n") { + // Skip comments, empty lines or lines with tabs + if strings.HasPrefix(line, "#") || strings.HasPrefix(line, " ") || + strings.HasPrefix(line, "\t") || len(line) == 0 { + continue + } + + logrus.Debugf("config: [%d] '%s'", i, line) + parts := strings.SplitN(line, "=", 2) + if len(parts) != 2 { + return errors.Errorf("malformed key=value on line '%d'", i) + } + + if err := os.Setenv(strings.TrimSpace(parts[0]), strings.TrimSpace(parts[1])); err != nil { + return errors.Wrapf(err, "while settings environ for '%s=%s'", parts[0], parts[1]) + } + } + return nil +} + +func validHashKeys(m map[string]gubernator.HashFunc) string { + var rs []string + for k, _ := range m { + rs = append(rs, k) + } + return strings.Join(rs, ",") +} + +func validHash64Keys(m map[string]gubernator.HashFunc64) string { + var rs []string + for k, _ := range m { + rs = append(rs, k) + } + return strings.Join(rs, ",") +} diff --git a/config.go b/config.go index babe974c..1b9a67e8 100644 --- a/config.go +++ b/config.go @@ -21,6 +21,7 @@ import ( "time" "github.com/mailgun/holster/v3/setter" + "github.com/sirupsen/logrus" "google.golang.org/grpc" ) @@ -57,6 +58,9 @@ type Config struct { // deciding who we should immediately connect too for our local picker. Should remain empty if not // using multi data center support. DataCenter string + + // (Optional) Logger to be used when + Logger logrus.FieldLogger } type BehaviorConfig struct { @@ -82,6 +86,24 @@ type BehaviorConfig struct { MultiRegionBatchLimit int } +type PeerInfo struct { + // (Optional) The name of the data center this peer is in. Leave blank if not using multi data center support. + DataCenter string + // (Optional) The http address:port of the peer + HTTPAddress string + // (Required) The grpc address:port of the peer + GRPCAddress string + // (Optional) Is true if PeerInfo is for this instance of gubernator + IsOwner bool +} + +// Returns the hash key used to identify this peer in the Picker. +func (p PeerInfo) HashKey() string { + return p.GRPCAddress +} + +type UpdateFunc func([]PeerInfo) + func (c *Config) SetDefaults() error { setter.SetDefault(&c.Behaviors.BatchTimeout, time.Millisecond*500) setter.SetDefault(&c.Behaviors.BatchLimit, maxBatchSize) @@ -95,7 +117,7 @@ func (c *Config) SetDefaults() error { setter.SetDefault(&c.Behaviors.MultiRegionBatchLimit, maxBatchSize) setter.SetDefault(&c.Behaviors.MultiRegionSyncWait, time.Second) - setter.SetDefault(&c.LocalPicker, NewConsistantHash(nil)) + setter.SetDefault(&c.LocalPicker, NewReplicatedConsistentHash(nil, DefaultReplicas)) setter.SetDefault(&c.RegionPicker, NewRegionPicker(nil)) setter.SetDefault(&c.Cache, NewLRUCache(0)) diff --git a/daemon.go b/daemon.go new file mode 100644 index 00000000..1767685c --- /dev/null +++ b/daemon.go @@ -0,0 +1,315 @@ +/* +Copyright 2018-2020 Mailgun Technologies Inc + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package gubernator + +import ( + "context" + "net" + "net/http" + "strings" + + "github.com/grpc-ecosystem/grpc-gateway/runtime" + "github.com/mailgun/holster/v3/etcdutil" + "github.com/mailgun/holster/v3/setter" + "github.com/mailgun/holster/v3/syncutil" + "github.com/pkg/errors" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" + "github.com/sirupsen/logrus" + "google.golang.org/grpc" +) + +var DebugEnabled = false + +type DaemonConfig struct { + // (Required) The `address:port` that will accept GRPC requests + GRPCListenAddress string + + // (Required) The `address:port` that will accept HTTP requests + HTTPListenAddress string + + // (Optional) The `address:port` that is advertised to other Gubernator peers. + // Defaults to `GRPCListenAddress` + AdvertiseAddress string + + // (Optional) The number of items in the cache. Defaults to 50,000 + CacheSize int + + // (Optional) Configure how behaviours behave + Behaviors BehaviorConfig + + // (Optional) Identifies the datacenter this instance is running in. For + // use with multi-region support + DataCenter string + + // (Optional) Which pool to use when discovering other Gubernator peers + // Valid options are [etcd, k8s, member-list] (Defaults to 'member-list') + PeerDiscoveryType string + + // (Optional) Etcd configuration used for peer discovery + EtcdPoolConf EtcdPoolConfig + + // (Optional) K8s configuration used for peer discovery + K8PoolConf K8sPoolConfig + + // (Optional) Member list configuration used for peer discovery + MemberListPoolConf MemberListPoolConfig + + // (Optional) The PeerPicker as selected by `GUBER_PEER_PICKER` + Picker PeerPicker + + // (Optional) A Logger which implements the declared logger interface (typically *logrus.Entry) + Logger logrus.FieldLogger +} + +type Daemon struct { + GRPCListener net.Listener + HTTPListener net.Listener + V1Server *V1Instance + + log logrus.FieldLogger + pool PoolInterface + conf DaemonConfig + httpSrv *http.Server + grpcSrv *grpc.Server + wg syncutil.WaitGroup + statsHandler *GRPCStatsHandler + promRegister *prometheus.Registry + gwCancel context.CancelFunc +} + +// SpawnDaemon starts a new gubernator daemon according to the provided DaemonConfig. +// This function will block until the daemon responds to connections as specified +// by GRPCListenAddress and HTTPListenAddress +func SpawnDaemon(ctx context.Context, conf DaemonConfig) (*Daemon, error) { + s := Daemon{ + log: conf.Logger, + conf: conf, + } + setter.SetDefault(&s.log, logrus.WithField("category", "gubernator")) + + if err := s.Start(ctx); err != nil { + return nil, err + } + return &s, nil +} + +func (s *Daemon) Start(ctx context.Context) error { + var err error + + // The LRU cache we store rate limits in + cache := NewLRUCache(s.conf.CacheSize) + + // cache also implements prometheus.Collector interface + s.promRegister = prometheus.NewRegistry() + s.promRegister.Register(cache) + + // Handler to collect duration and API access metrics for GRPC + s.statsHandler = NewGRPCStatsHandler() + s.promRegister.Register(s.statsHandler) + + // New GRPC server + s.grpcSrv = grpc.NewServer( + grpc.StatsHandler(s.statsHandler), + grpc.MaxRecvMsgSize(1024*1024)) + + // Registers a new gubernator instance with the GRPC server + s.V1Server, err = NewV1Instance(Config{ + DataCenter: s.conf.DataCenter, + LocalPicker: s.conf.Picker, + GRPCServer: s.grpcSrv, + Logger: s.log, + Cache: cache, + }) + if err != nil { + return errors.Wrap(err, "while creating new gubernator instance") + } + + // V1Server instance also implements prometheus.Collector interface + s.promRegister.Register(s.V1Server) + + s.GRPCListener, err = net.Listen("tcp", s.conf.GRPCListenAddress) + if err != nil { + return errors.Wrap(err, "while starting GRPC listener") + } + + // Start serving GRPC Requests + s.wg.Go(func() { + s.log.Infof("GRPC Listening on %s ...", s.conf.GRPCListenAddress) + if err := s.grpcSrv.Serve(s.GRPCListener); err != nil { + s.log.WithError(err).Error("while starting GRPC server") + } + }) + + switch s.conf.PeerDiscoveryType { + case "k8s": + // Source our list of peers from kubernetes endpoint API + s.conf.K8PoolConf.OnUpdate = s.V1Server.SetPeers + s.pool, err = NewK8sPool(s.conf.K8PoolConf) + if err != nil { + return errors.Wrap(err, "while querying kubernetes API") + } + case "etcd": + s.conf.EtcdPoolConf.OnUpdate = s.V1Server.SetPeers + // Register ourselves with other peers via ETCD + s.conf.EtcdPoolConf.Client, err = etcdutil.NewClient(s.conf.EtcdPoolConf.EtcdConfig) + if err != nil { + return errors.Wrap(err, "while connecting to etcd") + } + + s.pool, err = NewEtcdPool(s.conf.EtcdPoolConf) + if err != nil { + return errors.Wrap(err, "while creating etcd pool") + } + case "member-list": + s.conf.MemberListPoolConf.OnUpdate = s.V1Server.SetPeers + s.conf.MemberListPoolConf.Logger = s.log + + // Register peer on member list + s.pool, err = NewMemberListPool(ctx, s.conf.MemberListPoolConf) + if err != nil { + return errors.Wrap(err, "while creating member list pool") + } + } + + // Setup an JSON Gateway API for our GRPC methods + gateway := runtime.NewServeMux() + var gwCtx context.Context + gwCtx, s.gwCancel = context.WithCancel(context.Background()) + err = RegisterV1HandlerFromEndpoint(gwCtx, gateway, + s.conf.GRPCListenAddress, []grpc.DialOption{grpc.WithInsecure()}) + if err != nil { + return errors.Wrap(err, "while registering GRPC gateway handler") + } + + // Serve the JSON Gateway and metrics handlers via standard HTTP/1 + mux := http.NewServeMux() + + mux.Handle("/metrics", promhttp.InstrumentMetricHandler( + s.promRegister, promhttp.HandlerFor(s.promRegister, promhttp.HandlerOpts{}), + )) + mux.Handle("/", gateway) + s.httpSrv = &http.Server{Addr: s.conf.HTTPListenAddress, Handler: mux} + + s.HTTPListener, err = net.Listen("tcp", s.conf.HTTPListenAddress) + if err != nil { + return errors.Wrap(err, "while starting HTTP listener") + } + + s.wg.Go(func() { + s.log.Infof("HTTP Gateway Listening on %s ...", s.conf.HTTPListenAddress) + if err := s.httpSrv.Serve(s.HTTPListener); err != nil { + if err != http.ErrServerClosed { + s.log.WithError(err).Error("while starting HTTP server") + } + } + }) + + // Validate we can reach the GRPC and HTTP endpoints before returning + if err := WaitForConnect(ctx, []string{s.conf.HTTPListenAddress, s.conf.GRPCListenAddress}); err != nil { + return err + } + + return nil +} + +// Close gracefully closes all server connections and listening sockets +func (s *Daemon) Close() { + if s.httpSrv == nil { + return + } + + if s.pool != nil { + s.pool.Close() + } + + s.log.Infof("HTTP Gateway close for %s ...", s.conf.HTTPListenAddress) + s.httpSrv.Shutdown(context.Background()) + s.log.Infof("GRPC close for %s ...", s.conf.GRPCListenAddress) + s.grpcSrv.GracefulStop() + s.wg.Stop() + s.statsHandler.Close() + s.gwCancel() + s.httpSrv = nil + s.grpcSrv = nil +} + +// SetPeers sets the peers for this daemon +func (s *Daemon) SetPeers(in []PeerInfo) { + peers := make([]PeerInfo, len(in)) + copy(peers, in) + + for i, p := range peers { + if s.conf.GRPCListenAddress == p.GRPCAddress { + peers[i].IsOwner = true + } + } + s.V1Server.SetPeers(peers) +} + +// Config returns the current config for this Daemon +func (s *Daemon) Config() DaemonConfig { + return s.conf +} + +// Peers returns the peers this daemon knows about +func (s *Daemon) Peers() []PeerInfo { + var peers []PeerInfo + for _, client := range s.V1Server.GetPeerList() { + peers = append(peers, client.PeerInfo()) + } + return peers +} + +// WaitForConnect returns nil if the list of addresses is listening for connections; will block until context is cancelled. +func WaitForConnect(ctx context.Context, addresses []string) error { + var d net.Dialer + var errs []error + for { + errs = nil + for _, addr := range addresses { + if addr == "" { + continue + } + + conn, err := d.DialContext(ctx, "tcp", addr) + if err != nil { + errs = append(errs, err) + continue + } + conn.Close() + } + + if len(errs) == 0 { + break + } + + select { + case <-ctx.Done(): + return ctx.Err() + } + } + + if len(errs) != 0 { + var errStrings []string + for _, err := range errs { + errStrings = append(errStrings, err.Error()) + } + return errors.New(strings.Join(errStrings, "\n")) + } + return nil +} diff --git a/docker-compose-etcd.yaml b/docker-compose-etcd.yaml new file mode 100644 index 00000000..815af3fb --- /dev/null +++ b/docker-compose-etcd.yaml @@ -0,0 +1,101 @@ +version: '3' +services: + etcd: + image: quay.io/coreos/etcd:v3.3.10 + command: > + /usr/local/bin/etcd + -name etcd0 + -advertise-client-urls http://localhost:2379 + -listen-client-urls http://0.0.0.0:2379 + -initial-advertise-peer-urls http://0.0.0.0:2380 + -listen-peer-urls http://0.0.0.0:2380 + -initial-cluster-token etcd-cluster-1 + -initial-cluster etcd0=http://0.0.0.0:2380 + -initial-cluster-state new + ports: + - "2379:2379" + + gubernator-1: + image: thrawn01/gubernator:latest + command: "/gubernator" + environment: + # The address GRPC requests will listen on + - GUBER_GRPC_ADDRESS=0.0.0.0:81 + # The address HTTP requests will listen on + - GUBER_HTTP_ADDRESS=0.0.0.0:80 + # Choose the etcd peer discovery type + - GUBER_PEER_DISCOVERY_TYPE=etcd + # A comma separated list of etcd nodes + - GUBER_ETCD_ENDPOINTS=etcd:2379 + # The key prefix used in the etcd store + - GUBER_ETCD_KEY_PREFIX=/gubernator-docker + # The address that is advertised to other peers + - GUBER_ETCD_ADVERTISE_ADDRESS=gubernator-1:81 + #- GUBER_DATA_CENTER=us-east-1 + ports: + - "9081:81" + - "9080:80" + + gubernator-2: + image: thrawn01/gubernator:latest + command: "/gubernator" + environment: + # The address GRPC requests will listen on + - GUBER_GRPC_ADDRESS=0.0.0.0:81 + # The address HTTP requests will listen on + - GUBER_HTTP_ADDRESS=0.0.0.0:80 + # Choose the etcd peer discovery type + - GUBER_PEER_DISCOVERY_TYPE=etcd + # A comma separated list of etcd nodes + - GUBER_ETCD_ENDPOINTS=etcd:2379 + # The key prefix used in the etcd store + - GUBER_ETCD_KEY_PREFIX=/gubernator-docker + # The address that is advertised to other peers + - GUBER_ETCD_ADVERTISE_ADDRESS=gubernator-2:81 + #- GUBER_DATA_CENTER=us-east-1 + ports: + - "9181:81" + - "9180:80" + + gubernator-3: + image: thrawn01/gubernator:latest + command: "/gubernator" + environment: + # The address GRPC requests will listen on + - GUBER_GRPC_ADDRESS=0.0.0.0:81 + # The address HTTP requests will listen on + - GUBER_HTTP_ADDRESS=0.0.0.0:80 + # Choose the etcd peer discovery type + - GUBER_PEER_DISCOVERY_TYPE=etcd + # A comma separated list of etcd nodes + - GUBER_ETCD_ENDPOINTS=etcd:2379 + # The key prefix used in the etcd store + - GUBER_ETCD_KEY_PREFIX=/gubernator-docker + # The address that is advertised to other peers + - GUBER_ETCD_ADVERTISE_ADDRESS=gubernator-3:81 + #- GUBER_DATA_CENTER=us-west-2 + ports: + - "9281:81" + - "9280:80" + + gubernator-4: + image: thrawn01/gubernator:latest + command: "/gubernator" + environment: + - GUBER_DEBUG=true + # The address GRPC requests will listen on + - GUBER_GRPC_ADDRESS=0.0.0.0:81 + # The address HTTP requests will listen on + - GUBER_HTTP_ADDRESS=0.0.0.0:80 + # Choose the etcd peer discovery type + - GUBER_PEER_DISCOVERY_TYPE=etcd + # A comma separated list of etcd nodes + - GUBER_ETCD_ENDPOINTS=etcd:2379 + # The key prefix used in the etcd store + - GUBER_ETCD_KEY_PREFIX=/gubernator-docker + # The address that is advertised to other peers + - GUBER_ADVERTISE_ADDRESS=gubernator-4:81 + #- GUBER_DATA_CENTER=us-west-2 + ports: + - "9381:81" + - "9380:80" diff --git a/docker-compose.yaml b/docker-compose.yaml index 04413e63..cac65026 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -8,14 +8,16 @@ services: - GUBER_GRPC_ADDRESS=0.0.0.0:81 # The address HTTP requests will listen on - GUBER_HTTP_ADDRESS=0.0.0.0:80 + # The address that is advertised to other peers + - GUBER_ADVERTISE_ADDRESS=gubernator-1:81 # Max size of the cache; The cache size will never grow beyond this size. - GUBER_CACHE_SIZE=50000 # A comma separated list of known gubernator nodes - - GUBER_MEMBERLIST_KNOWN_NODES=gubernator-1,gubernator-2,gubernator-3,gubernator-4 - - GUBER_DATA_CENTER=us-east-1 + - GUBER_MEMBERLIST_KNOWN_NODES=gubernator-1 + #- GUBER_DATA_CENTER=us-east-1 ports: - - "8081:81" - - "8080:80" + - "9081:81" + - "9080:80" gubernator-2: image: thrawn01/gubernator:latest @@ -25,14 +27,16 @@ services: - GUBER_GRPC_ADDRESS=0.0.0.0:81 # The address HTTP requests will listen on - GUBER_HTTP_ADDRESS=0.0.0.0:80 + # The address that is advertised to other peers + - GUBER_ADVERTISE_ADDRESS=gubernator-2:81 # Max size of the cache; The cache size will never grow beyond this size. - GUBER_CACHE_SIZE=50000 # A comma separated list of known gubernator nodes - - GUBER_MEMBERLIST_KNOWN_NODES=gubernator-1,gubernator-2,gubernator-3,gubernator-4 - - GUBER_DATA_CENTER=us-east-1 + - GUBER_MEMBERLIST_KNOWN_NODES=gubernator-1 + #- GUBER_DATA_CENTER=us-east-1 ports: - - "8181:81" - - "8180:80" + - "9181:81" + - "9180:80" gubernator-3: image: thrawn01/gubernator:latest @@ -42,28 +46,33 @@ services: - GUBER_GRPC_ADDRESS=0.0.0.0:81 # The address HTTP requests will listen on - GUBER_HTTP_ADDRESS=0.0.0.0:80 + # The address that is advertised to other peers + - GUBER_ADVERTISE_ADDRESS=gubernator-3:81 # Max size of the cache; The cache size will never grow beyond this size. - GUBER_CACHE_SIZE=50000 # A comma separated list of known gubernator nodes - - GUBER_MEMBERLIST_KNOWN_NODES=gubernator-1,gubernator-2,gubernator-3,gubernator-4 - - GUBER_DATA_CENTER=us-west-2 + - GUBER_MEMBERLIST_KNOWN_NODES=gubernator-1 + #- GUBER_DATA_CENTER=us-west-2 ports: - - "8281:81" - - "8280:80" + - "9281:81" + - "9280:80" gubernator-4: image: thrawn01/gubernator:latest command: "/gubernator" environment: + - GUBER_DEBUG=true # The address GRPC requests will listen on - GUBER_GRPC_ADDRESS=0.0.0.0:81 # The address HTTP requests will listen on - GUBER_HTTP_ADDRESS=0.0.0.0:80 + # The address that is advertised to other peers + - GUBER_ADVERTISE_ADDRESS=gubernator-4:81 # Max size of the cache; The cache size will never grow beyond this size. - GUBER_CACHE_SIZE=50000 - # A Comma separate list of known gubernator nodes - - GUBER_MEMBERLIST_KNOWN_NODES=gubernator-1,gubernator-2,gubernator-3,gubernator-4 - - GUBER_DATA_CENTER=us-west-2 + # A Comma separated list of known gubernator nodes + - GUBER_MEMBERLIST_KNOWN_NODES=gubernator-1 + #- GUBER_DATA_CENTER=us-west-2 ports: - - "8381:81" - - "8380:80" + - "9381:81" + - "9380:80" diff --git a/etcd.go b/etcd.go index b7d24f9a..b15632ba 100644 --- a/etcd.go +++ b/etcd.go @@ -18,37 +18,19 @@ package gubernator import ( "context" - "time" + "encoding/json" etcd "github.com/coreos/etcd/clientv3" + "github.com/mailgun/holster/v3/clock" "github.com/mailgun/holster/v3/setter" "github.com/mailgun/holster/v3/syncutil" "github.com/pkg/errors" "github.com/sirupsen/logrus" ) -type PeerInfo struct { - - // (Optional) The name of the data center this peer is in. Leave blank if not using multi data center support. - DataCenter string - - // (Required) The IP address of the peer which will field peer requests - Address string - - // (Optional) Is true if PeerInfo is for this instance of gubernator - IsOwner bool -} - -// Returns the hash key used to identify this peer in the Picker. -func (p PeerInfo) HashKey() string { - return p.Address -} - -type UpdateFunc func([]PeerInfo) - const ( - etcdTimeout = time.Second * 10 - backOffTimeout = time.Second * 5 + etcdTimeout = clock.Second * 10 + backOffTimeout = clock.Second * 5 leaseTTL = 30 defaultBaseKey = "/gubernator/peers/" ) @@ -58,28 +40,45 @@ type PoolInterface interface { } type EtcdPool struct { - peers map[string]struct{} + peers map[string]PeerInfo wg syncutil.WaitGroup ctx context.Context cancelCtx context.CancelFunc watchChan etcd.WatchChan - log *logrus.Entry + log logrus.FieldLogger watcher etcd.Watcher conf EtcdPoolConfig } type EtcdPoolConfig struct { + // (Required) The address etcd will advertise to other gubernator instances AdvertiseAddress string - BaseKey string - Client *etcd.Client - OnUpdate UpdateFunc + + // (Required) An etcd client currently connected to an etcd cluster + Client *etcd.Client + + // (Required) Called when the list of gubernators in the pool updates + OnUpdate UpdateFunc + + // (Optional) The etcd key prefix used when discovering other peers. Defaults to `/gubernator/peers/` + KeyPrefix string + + // (Optional) The etcd config used to connect to the etcd cluster + EtcdConfig *etcd.Config + + // (Optional) An interface through which logging will occur (Usually *logrus.Entry) + Logger logrus.FieldLogger + + // (Optional) The datacenter this instance belongs too + DataCenter string } func NewEtcdPool(conf EtcdPoolConfig) (*EtcdPool, error) { - setter.SetDefault(&conf.BaseKey, defaultBaseKey) + setter.SetDefault(&conf.KeyPrefix, defaultBaseKey) + setter.SetDefault(&conf.Logger, logrus.WithField("category", "gubernator")) if conf.AdvertiseAddress == "" { - return nil, errors.New("GUBER_ETCD_ADVERTISE_ADDRESS is required") + return nil, errors.New("AdvertiseAddress is required") } if conf.Client == nil { @@ -88,8 +87,8 @@ func NewEtcdPool(conf EtcdPoolConfig) (*EtcdPool, error) { ctx, cancel := context.WithCancel(context.Background()) pool := &EtcdPool{ - log: logrus.WithField("category", "gubernator-pool"), - peers: make(map[string]struct{}), + log: conf.Logger, + peers: make(map[string]PeerInfo), cancelCtx: cancel, conf: conf, ctx: ctx, @@ -128,15 +127,15 @@ func (e *EtcdPool) watchPeers() error { ready := make(chan struct{}) go func() { - e.watchChan = e.watcher.Watch(etcd.WithRequireLeader(e.ctx), e.conf.BaseKey, + e.watchChan = e.watcher.Watch(etcd.WithRequireLeader(e.ctx), e.conf.KeyPrefix, etcd.WithRev(revision), etcd.WithPrefix(), etcd.WithPrevKV()) close(ready) }() select { case <-ready: - e.log.Infof("watching for peer changes '%s' at revision %d", e.conf.BaseKey, revision) - case <-time.After(etcdTimeout): + e.log.Infof("watching for peer changes '%s' at revision %d", e.conf.KeyPrefix, revision) + case <-clock.After(etcdTimeout): return errors.New("timed out while waiting for watcher.Watch() to start") } return nil @@ -146,20 +145,32 @@ func (e *EtcdPool) collectPeers(revision *int64) error { ctx, cancel := context.WithTimeout(e.ctx, etcdTimeout) defer cancel() - resp, err := e.conf.Client.Get(ctx, e.conf.BaseKey, etcd.WithPrefix()) + resp, err := e.conf.Client.Get(ctx, e.conf.KeyPrefix, etcd.WithPrefix()) if err != nil { - return errors.Wrapf(err, "while fetching peer listing from '%s'", e.conf.BaseKey) + return errors.Wrapf(err, "while fetching peer listing from '%s'", e.conf.KeyPrefix) } // Collect all the peers for _, v := range resp.Kvs { - e.peers[string(v.Value)] = struct{}{} + p := e.unMarshallValue(v.Value) + e.peers[p.GRPCAddress] = p } e.callOnUpdate() return nil } +func (e *EtcdPool) unMarshallValue(v []byte) PeerInfo { + var p PeerInfo + + // for backward compatible with older gubernator versions + if err := json.Unmarshal(v, &p); err != nil { + e.log.WithError(err).Errorf("while unmarshalling peer info from key value") + return PeerInfo{GRPCAddress: string(v)} + } + return p +} + func (e *EtcdPool) watch() error { // Initialize watcher if err := e.watchPeers(); err != nil { @@ -183,7 +194,8 @@ func (e *EtcdPool) watch() error { case etcd.EventTypePut: if event.Kv != nil { e.log.Debugf("new peer [%s]", string(event.Kv.Value)) - e.peers[string(event.Kv.Value)] = struct{}{} + p := e.unMarshallValue(event.Kv.Value) + e.peers[p.GRPCAddress] = p } case etcd.EventTypeDelete: if event.PrevKv != nil { @@ -209,7 +221,7 @@ func (e *EtcdPool) watch() error { e.log.WithError(err). Error("while attempting to restart watch") select { - case <-time.After(backOffTimeout): + case <-clock.After(backOffTimeout): return true case <-done: return false @@ -222,9 +234,17 @@ func (e *EtcdPool) watch() error { } func (e *EtcdPool) register(name string) error { - instanceKey := e.conf.BaseKey + name + instanceKey := e.conf.KeyPrefix + name e.log.Infof("Registering peer '%s' with etcd", name) + b, err := json.Marshal(PeerInfo{ + GRPCAddress: e.conf.AdvertiseAddress, + DataCenter: e.conf.DataCenter, + }) + if err != nil { + return errors.Wrap(err, "while marshalling PeerInfo") + } + var keepAlive <-chan *etcd.LeaseKeepAliveResponse var lease *etcd.LeaseGrantResponse @@ -238,7 +258,7 @@ func (e *EtcdPool) register(name string) error { return errors.Wrapf(err, "during grant lease") } - _, err = e.conf.Client.Put(ctx, instanceKey, name, etcd.WithLease(lease.ID)) + _, err = e.conf.Client.Put(ctx, instanceKey, string(b), etcd.WithLease(lease.ID)) if err != nil { return errors.Wrap(err, "during put") } @@ -249,8 +269,7 @@ func (e *EtcdPool) register(name string) error { return nil } - var err error - var lastKeepAlive time.Time + var lastKeepAlive clock.Time // Attempt to register our instance with etcd if err = register(); err != nil { @@ -264,7 +283,7 @@ func (e *EtcdPool) register(name string) error { e.log.WithError(err). Error("while attempting to re-register peer") select { - case <-time.After(backOffTimeout): + case <-clock.After(backOffTimeout): return true case <-done: return false @@ -287,12 +306,12 @@ func (e *EtcdPool) register(name string) error { } // Ensure we are getting keep alive's regularly - if lastKeepAlive.Sub(time.Now()) > time.Second*leaseTTL { + if lastKeepAlive.Sub(clock.Now()) > clock.Second*leaseTTL { e.log.Warn("to long between keep alive heartbeats, re-registering peer") keepAlive = nil return true } - lastKeepAlive = time.Now() + lastKeepAlive = clock.Now() case <-done: ctx, cancel := context.WithTimeout(context.Background(), etcdTimeout) if _, err := e.conf.Client.Delete(ctx, instanceKey); err != nil { @@ -321,12 +340,11 @@ func (e *EtcdPool) Close() { func (e *EtcdPool) callOnUpdate() { var peers []PeerInfo - for k := range e.peers { - if k == e.conf.AdvertiseAddress { - peers = append(peers, PeerInfo{Address: k, IsOwner: true}) - } else { - peers = append(peers, PeerInfo{Address: k}) + for _, p := range e.peers { + if p.GRPCAddress == e.conf.AdvertiseAddress { + p.IsOwner = true } + peers = append(peers, p) } e.conf.OnUpdate(peers) diff --git a/example.conf b/example.conf index 72b3b355..b470a8d5 100644 --- a/example.conf +++ b/example.conf @@ -3,16 +3,27 @@ ############################ # The address GRPC requests will listen on -GUBER_GRPC_ADDRESS=0.0.0.0:81 +GUBER_GRPC_ADDRESS=0.0.0.0:9990 # The address HTTP requests will listen on -GUBER_HTTP_ADDRESS=0.0.0.0:80 +GUBER_HTTP_ADDRESS=0.0.0.0:9980 + +# The address gubernator peers will connect too. +# +# Should be the same as GUBER_GRPC_ADDRESS unless you are running behind a NAT +# or running in a docker container without host networking. +# +# If unset, will default to the hostname or if that fails will attempt +# to guess at a non loopback interface +# GUBER_ADVERTISE_ADDRESS=localhost:81 # Max size of the cache; This is the cache that holds # all the rate limits. The cache size will never grow # beyond this size. GUBER_CACHE_SIZE=50000 +# The name of the datacenter this gubernator instance is in. +# GUBER_DATA_CENTER=datacenter1 ############################ # Behavior Config @@ -36,9 +47,30 @@ GUBER_CACHE_SIZE=50000 # How long a node will wait before sending a batch of GLOBAL updates to a peer #GUBER_GLOBAL_SYNC_WAIT=500ns +############################ +# Peer Discovery Type +############################ +# Which type of peer discovery gubernator will use ('member-list', 'etcd', 'k8s') +# GUBER_PEER_DISCOVERY_TYPE=member-list + +############################ +# Member-List Config (GUBER_PEER_DISCOVERY_TYPE=member-list) +############################ + +# The address peers will connect too. Defaults to GUBER_ADVERTISE_ADDRESS +# GUBER_MEMBERLIST_ADVERTISE_ADDRESS=localhost:81 + +# The address the member list will listen to in order to discover other list members. +# This should be a different port than GUBER_ADVERTISE_ADDRESS +# GUBER_MEMBERLIST_ADDRESS=localhost:7946 + +# This is an initial list or a single domain name that 'member-list' will connect to in order to +# begin discovering other peers. +# GUBER_MEMBERLIST_KNOWN_NODES=peer1:7946,peer2:7946,peer3:7946 +# GUBER_MEMBERLIST_KNOWN_NODES=memberlist.example.com ############################ -# Kubernetes Config +# Kubernetes Config (GUBER_PEER_DISCOVERY_TYPE=k8s) ############################ # The namespace the gubernator instances were deployed into @@ -56,16 +88,14 @@ GUBER_CACHE_SIZE=50000 ############################ -# Etcd Config +# Etcd Config (GUBER_PEER_DISCOVERY_TYPE=etcd) ############################ # A Comma separate list of etcd nodes -GUBER_ETCD_ENDPOINTS=localhost:2379 +# GUBER_ETCD_ENDPOINTS=localhost:2379 -# The address peers will connect too -# Should be the same as grpc-listen-address unless you are running behind -# a NAT or running in a docker container without host networking -GUBER_ETCD_ADVERTISE_ADDRESS=localhost:81 +# The address peers will connect too. Defaults to GUBER_ADVERTISE_ADDRESS +# GUBER_ETCD_ADVERTISE_ADDRESS=localhost:81 # The prefix gubernator will use to register peers under in etcd #GUBER_ETCD_KEY_PREFIX=/gubernator-peers @@ -73,6 +103,9 @@ GUBER_ETCD_ADVERTISE_ADDRESS=localhost:81 # How long etcd client will wait for a response when initial dialing a node #GUBER_ETCD_DIAL_TIMEOUT=5s +# The name of the datacenter this gubernator instance is in. +# GUBER_ETCD_DATA_CENTER=datacenter1 + # Authentication #GUBER_ETCD_USER= #GUBER_ETCD_PASSWORD= @@ -109,5 +142,5 @@ GUBER_ETCD_ADVERTISE_ADDRESS=localhost:81 # GUBER_PEER_PICKER_HASH=fnv1a # Choose the number of replications -# GUBER_REPLICATED_HASH_REPLICAS=1 +# GUBER_REPLICATED_HASH_REPLICAS=512 diff --git a/functional_test.go b/functional_test.go index 04f28424..3e9dc940 100644 --- a/functional_test.go +++ b/functional_test.go @@ -19,12 +19,14 @@ package gubernator_test import ( "context" "fmt" + "net/http" "os" "testing" - "time" guber "github.com/mailgun/gubernator" "github.com/mailgun/gubernator/cluster" + "github.com/mailgun/holster/v3/clock" + "github.com/mailgun/holster/v3/testutil" "github.com/prometheus/client_golang/prometheus" dto "github.com/prometheus/client_model/go" "github.com/stretchr/testify/assert" @@ -33,13 +35,13 @@ import ( // Setup and shutdown the mock gubernator cluster for the entire test suite func TestMain(m *testing.M) { - if err := cluster.StartWith([]string{ - "127.0.0.1:9990", - "127.0.0.1:9991", - "127.0.0.1:9992", - "127.0.0.1:9993", - "127.0.0.1:9994", - "127.0.0.1:9995", + if err := cluster.StartWith([]guber.PeerInfo{ + {GRPCAddress: "127.0.0.1:9990", HTTPAddress: "127.0.0.1:9980"}, + {GRPCAddress: "127.0.0.1:9991", HTTPAddress: "127.0.0.1:9981"}, + {GRPCAddress: "127.0.0.1:9992", HTTPAddress: "127.0.0.1:9982"}, + {GRPCAddress: "127.0.0.1:9993", HTTPAddress: "127.0.0.1:9983"}, + {GRPCAddress: "127.0.0.1:9994", HTTPAddress: "127.0.0.1:9984"}, + {GRPCAddress: "127.0.0.1:9995", HTTPAddress: "127.0.0.1:9985"}, }); err != nil { fmt.Println(err) os.Exit(1) @@ -49,7 +51,7 @@ func TestMain(m *testing.M) { } func TestOverTheLimit(t *testing.T) { - client, errs := guber.DialV1Server(cluster.GetRandomPeer().Address) + client, errs := guber.DialV1Server(cluster.GetRandomPeer().GRPCAddress) require.Nil(t, errs) tests := []struct { @@ -96,28 +98,30 @@ func TestOverTheLimit(t *testing.T) { } func TestTokenBucket(t *testing.T) { - client, errs := guber.DialV1Server(cluster.GetRandomPeer().Address) + defer clock.Freeze(clock.Now()).Unfreeze() + + client, errs := guber.DialV1Server(cluster.GetRandomPeer().GRPCAddress) require.Nil(t, errs) tests := []struct { Remaining int64 Status guber.Status - Sleep time.Duration + Sleep clock.Duration }{ { Remaining: 1, Status: guber.Status_UNDER_LIMIT, - Sleep: time.Duration(0), + Sleep: clock.Duration(0), }, { Remaining: 0, Status: guber.Status_UNDER_LIMIT, - Sleep: time.Duration(time.Millisecond * 5), + Sleep: clock.Duration(clock.Millisecond * 100), }, { Remaining: 1, Status: guber.Status_UNDER_LIMIT, - Sleep: time.Duration(0), + Sleep: clock.Duration(0), }, } @@ -143,43 +147,45 @@ func TestTokenBucket(t *testing.T) { assert.Equal(t, test.Remaining, rl.Remaining) assert.Equal(t, int64(2), rl.Limit) assert.True(t, rl.ResetTime != 0) - time.Sleep(test.Sleep) + clock.Advance(test.Sleep) } } func TestLeakyBucket(t *testing.T) { - client, errs := guber.DialV1Server(cluster.GetRandomPeer().Address) + defer clock.Freeze(clock.Now()).Unfreeze() + + client, errs := guber.DialV1Server(cluster.PeerAt(0).GRPCAddress) require.Nil(t, errs) tests := []struct { Hits int64 Remaining int64 Status guber.Status - Sleep time.Duration + Sleep clock.Duration }{ { Hits: 5, Remaining: 0, Status: guber.Status_UNDER_LIMIT, - Sleep: time.Duration(0), + Sleep: clock.Duration(0), }, { Hits: 1, Remaining: 0, Status: guber.Status_OVER_LIMIT, - Sleep: time.Duration(time.Millisecond * 10), + Sleep: clock.Millisecond * 100, }, { Hits: 1, Remaining: 0, Status: guber.Status_UNDER_LIMIT, - Sleep: time.Duration(time.Millisecond * 20), + Sleep: clock.Millisecond * 400, }, { Hits: 1, - Remaining: 1, + Remaining: 4, Status: guber.Status_UNDER_LIMIT, - Sleep: time.Duration(0), + Sleep: clock.Duration(0), }, } @@ -190,13 +196,14 @@ func TestLeakyBucket(t *testing.T) { Name: "test_leaky_bucket", UniqueKey: "account:1234", Algorithm: guber.Algorithm_LEAKY_BUCKET, - Duration: guber.Millisecond * 50, + Duration: guber.Millisecond * 300, Hits: test.Hits, Limit: 5, }, }, }) - require.Nil(t, err) + clock.Freeze(clock.Now()) + require.NoError(t, err) rl := resp.Responses[0] @@ -204,12 +211,12 @@ func TestLeakyBucket(t *testing.T) { assert.Equal(t, test.Remaining, rl.Remaining, i) assert.Equal(t, int64(5), rl.Limit, i) assert.True(t, rl.ResetTime != 0) - time.Sleep(test.Sleep) + clock.Advance(test.Sleep) } } func TestMissingFields(t *testing.T) { - client, errs := guber.DialV1Server(cluster.GetRandomPeer().Address) + client, errs := guber.DialV1Server(cluster.GetRandomPeer().GRPCAddress) require.Nil(t, errs) tests := []struct { @@ -272,15 +279,13 @@ func TestMissingFields(t *testing.T) { } func TestGlobalRateLimits(t *testing.T) { - const clientInstance = 1 - peer := cluster.PeerAt(clientInstance) - client, errs := guber.DialV1Server(peer.Address) - require.Nil(t, errs) - - ctx, cancel := context.WithTimeout(context.Background(), time.Second*3) - defer cancel() + peer := cluster.PeerAt(0).GRPCAddress + client, errs := guber.DialV1Server(peer) + require.NoError(t, errs) sendHit := func(status guber.Status, remain int64, i int) string { + ctx, cancel := context.WithTimeout(context.Background(), clock.Second*5) + defer cancel() resp, err := client.GetRateLimits(ctx, &guber.GetRateLimitsReq{ Requests: []*guber.RateLimitReq{ { @@ -294,7 +299,7 @@ func TestGlobalRateLimits(t *testing.T) { }, }, }) - require.Nil(t, err, i) + require.NoError(t, err, i) assert.Equal(t, "", resp.Responses[0].Error, i) assert.Equal(t, status, resp.Responses[0].Status, i) assert.Equal(t, remain, resp.Responses[0].Remaining, i) @@ -316,36 +321,31 @@ func TestGlobalRateLimits(t *testing.T) { // Our second should be processed as if we own it since the async forward hasn't occurred yet sendHit(guber.Status_UNDER_LIMIT, 3, 2) - time.Sleep(time.Second) - - // After sleeping this response should be from the updated async call from our owner. Notice the - // remaining is still 3 as the hit is queued for update to the owner - canonicalHost := sendHit(guber.Status_UNDER_LIMIT, 3, 3) - - canonicalInstance := cluster.InstanceForHost(canonicalHost) - - // Inspect our metrics, ensure they collected the counts we expected during this test - instance := cluster.InstanceForHost(peer.Address) - - metricCh := make(chan prometheus.Metric, 5) - instance.Guber.Collect(metricCh) - - buf := dto.Metric{} - m := <-metricCh // Async metric - assert.Nil(t, m.Write(&buf)) - assert.Equal(t, uint64(1), *buf.Histogram.SampleCount) - - metricCh = make(chan prometheus.Metric, 5) - canonicalInstance.Guber.Collect(metricCh) - - m = <-metricCh // Async metric - m = <-metricCh // Broadcast metric - assert.Nil(t, m.Write(&buf)) - assert.Equal(t, uint64(1), *buf.Histogram.SampleCount) + testutil.UntilPass(t, 20, clock.Millisecond*200, func(t testutil.TestingT) { + // Inspect our metrics, ensure they collected the counts we expected during this test + d := cluster.DaemonAt(0) + metricCh := make(chan prometheus.Metric, 5) + d.V1Server.Collect(metricCh) + + buf := dto.Metric{} + m := <-metricCh // Async metric + assert.Nil(t, m.Write(&buf)) + assert.Equal(t, uint64(2), *buf.Histogram.SampleCount) + + // V1Instance 2 should be the owner of our global rate limit + d = cluster.DaemonAt(2) + metricCh = make(chan prometheus.Metric, 5) + d.V1Server.Collect(metricCh) + + m = <-metricCh // Async metric + m = <-metricCh // Broadcast metric + assert.Nil(t, m.Write(&buf)) + assert.Equal(t, uint64(2), *buf.Histogram.SampleCount) + }) } func TestChangeLimit(t *testing.T) { - client, errs := guber.DialV1Server(cluster.GetRandomPeer().Address) + client, errs := guber.DialV1Server(cluster.GetRandomPeer().GRPCAddress) require.Nil(t, errs) tests := []struct { @@ -440,7 +440,7 @@ func TestChangeLimit(t *testing.T) { } func TestResetRemaining(t *testing.T) { - client, errs := guber.DialV1Server(cluster.GetRandomPeer().Address) + client, errs := guber.DialV1Server(cluster.GetRandomPeer().GRPCAddress) require.Nil(t, errs) tests := []struct { @@ -512,14 +512,14 @@ func TestResetRemaining(t *testing.T) { } func TestHealthCheck(t *testing.T) { - client, errs := guber.DialV1Server(cluster.InstanceAt(0).Address) - require.Nil(t, errs) + client, err := guber.DialV1Server(cluster.DaemonAt(0).GRPCListener.Addr().String()) + require.NoError(t, err) // Check that the cluster is healthy to start with healthResp, err := client.HealthCheck(context.Background(), &guber.HealthCheckReq{}) - require.Nil(t, err) + require.NoError(t, err) - assert.Equal(t, "healthy", healthResp.GetStatus()) + require.Equal(t, "healthy", healthResp.GetStatus()) // Create a global rate limit that will need to be sent to all peers in the cluster _, err = client.GetRateLimits(context.Background(), &guber.GetRateLimitsReq{ @@ -528,7 +528,7 @@ func TestHealthCheck(t *testing.T) { Name: "test_health_check", UniqueKey: "account:12345", Algorithm: guber.Algorithm_TOKEN_BUCKET, - Behavior: guber.Behavior_GLOBAL, + Behavior: guber.Behavior_BATCHING, Duration: guber.Second * 3, Hits: 1, Limit: 5, @@ -538,13 +538,14 @@ func TestHealthCheck(t *testing.T) { require.Nil(t, err) // Stop the rest of the cluster to ensure errors occur on our instance and - // collect addresses to restart the stopped instances after the test completes - var addresses []string - for i := 1; i < cluster.NumOfInstances(); i++ { - addresses = append(addresses, cluster.InstanceAt(i).Address) - cluster.StopInstanceAt(i) + // collect daemons to restart the stopped peers after the test completes + var daemons []*guber.Daemon + for i := 1; i < cluster.NumOfDaemons(); i++ { + d := cluster.DaemonAt(i) + require.NotNil(t, d) + d.Close() + daemons = append(daemons, d) } - time.Sleep(time.Second) // Hit the global rate limit again this time causing a connection error _, err = client.GetRateLimits(context.Background(), &guber.GetRateLimitsReq{ @@ -562,28 +563,33 @@ func TestHealthCheck(t *testing.T) { }) require.Nil(t, err) - // Check the health again to get back the connection error - healthResp, err = client.HealthCheck(context.Background(), &guber.HealthCheckReq{}) - require.Nil(t, err) + testutil.UntilPass(t, 20, clock.Millisecond*300, func(t testutil.TestingT) { + // Check the health again to get back the connection error + healthResp, err = client.HealthCheck(context.Background(), &guber.HealthCheckReq{}) + if assert.Nil(t, err) { + return + } - assert.Equal(t, "unhealthy", healthResp.GetStatus()) - assert.Contains(t, healthResp.GetMessage(), "connect: connection refused") + assert.Equal(t, "unhealthy", healthResp.GetStatus()) + assert.Contains(t, healthResp.GetMessage(), "connect: connection refused") + }) // Restart stopped instances - for i := 1; i < cluster.NumOfInstances(); i++ { - cluster.StartInstance(addresses[i-1], cluster.GetDefaultConfig()) - } + ctx, cancel := context.WithTimeout(context.Background(), clock.Second*15) + defer cancel() + cluster.Restart(ctx) + } func TestLeakyBucketDivBug(t *testing.T) { - client, errs := guber.DialV1Server(cluster.GetRandomPeer().Address) - require.Nil(t, errs) + client, err := guber.DialV1Server(cluster.GetRandomPeer().GRPCAddress) + require.NoError(t, err) resp, err := client.GetRateLimits(context.Background(), &guber.GetRateLimitsReq{ Requests: []*guber.RateLimitReq{ { - Name: "test_leaky_bucket", - UniqueKey: "account:1234", + Name: "test_leaky_bucket_div", + UniqueKey: "account:12345", Algorithm: guber.Algorithm_LEAKY_BUCKET, Duration: guber.Millisecond * 1000, Hits: 1, @@ -591,17 +597,18 @@ func TestLeakyBucketDivBug(t *testing.T) { }, }, }) + require.NoError(t, err) + assert.Equal(t, "", resp.Responses[0].Error) assert.Equal(t, guber.Status_UNDER_LIMIT, resp.Responses[0].Status) assert.Equal(t, int64(1999), resp.Responses[0].Remaining) assert.Equal(t, int64(2000), resp.Responses[0].Limit) - require.Nil(t, err) // Should result in a rate of 0.5 resp, err = client.GetRateLimits(context.Background(), &guber.GetRateLimitsReq{ Requests: []*guber.RateLimitReq{ { - Name: "test_leaky_bucket", - UniqueKey: "account:1234", + Name: "test_leaky_bucket_div", + UniqueKey: "account:12345", Algorithm: guber.Algorithm_LEAKY_BUCKET, Duration: guber.Millisecond * 1000, Hits: 100, @@ -609,9 +616,15 @@ func TestLeakyBucketDivBug(t *testing.T) { }, }, }) - require.Nil(t, err) + require.NoError(t, err) assert.Equal(t, int64(1900), resp.Responses[0].Remaining) assert.Equal(t, int64(2000), resp.Responses[0].Limit) } +func TestGRPCGateway(t *testing.T) { + resp, err := http.DefaultClient.Get("http://" + cluster.GetRandomPeer().HTTPAddress + "/v1/HealthCheck") + require.NoError(t, err) + assert.Equal(t, http.StatusOK, resp.StatusCode) +} + // TODO: Add a test for sending no rate limits RateLimitReqList.RateLimits = nil diff --git a/global.go b/global.go index 11eb99d1..ae2887ce 100644 --- a/global.go +++ b/global.go @@ -18,8 +18,8 @@ package gubernator import ( "context" - "time" + "github.com/mailgun/holster/v3/clock" "github.com/mailgun/holster/v3/syncutil" "github.com/prometheus/client_golang/prometheus" "github.com/sirupsen/logrus" @@ -32,16 +32,16 @@ type globalManager struct { broadcastQueue chan *RateLimitReq wg syncutil.WaitGroup conf BehaviorConfig - log *logrus.Entry - instance *Instance + log logrus.FieldLogger + instance *V1Instance asyncMetrics prometheus.Histogram broadcastMetrics prometheus.Histogram } -func newGlobalManager(conf BehaviorConfig, instance *Instance) *globalManager { +func newGlobalManager(conf BehaviorConfig, instance *V1Instance) *globalManager { gm := globalManager{ - log: log.WithField("category", "global-manager"), + log: instance.log, asyncMetrics: prometheus.NewHistogram(prometheus.HistogramOpts{ Name: "async_durations", Help: "The duration of GLOBAL async sends in seconds.", @@ -119,7 +119,7 @@ func (gm *globalManager) sendHits(hits map[string]*RateLimitReq) { req GetPeerRateLimitsReq } peerRequests := make(map[string]*pair) - start := time.Now() + start := clock.Now() // Assign each request to a peer for _, r := range hits { @@ -129,11 +129,11 @@ func (gm *globalManager) sendHits(hits map[string]*RateLimitReq) { continue } - p, ok := peerRequests[peer.info.Address] + p, ok := peerRequests[peer.info.GRPCAddress] if ok { p.req.Requests = append(p.req.Requests, r) } else { - peerRequests[peer.info.Address] = &pair{ + peerRequests[peer.info.GRPCAddress] = &pair{ client: peer, req: GetPeerRateLimitsReq{Requests: []*RateLimitReq{r}}, } @@ -148,11 +148,11 @@ func (gm *globalManager) sendHits(hits map[string]*RateLimitReq) { if err != nil { gm.log.WithError(err). - Errorf("error sending global hits to '%s'", p.client.info.Address) + Errorf("error sending global hits to '%s'", p.client.info.GRPCAddress) continue } } - gm.asyncMetrics.Observe(time.Since(start).Seconds()) + gm.asyncMetrics.Observe(clock.Since(start).Seconds()) } // runBroadcasts collects status changes for global rate limits and broadcasts the changes to each peer in the cluster. @@ -167,7 +167,7 @@ func (gm *globalManager) runBroadcasts() { // Send the hits if we reached our batch limit if len(updates) == gm.conf.GlobalBatchLimit { - gm.updatePeers(updates) + gm.broadcastPeers(updates) updates = make(map[string]*RateLimitReq) return true } @@ -180,7 +180,7 @@ func (gm *globalManager) runBroadcasts() { case <-interval.C: if len(updates) != 0 { - gm.updatePeers(updates) + gm.broadcastPeers(updates) updates = make(map[string]*RateLimitReq) } case <-done: @@ -190,10 +190,10 @@ func (gm *globalManager) runBroadcasts() { }) } -// updatePeers broadcasts global rate limit statuses to all other peers -func (gm *globalManager) updatePeers(updates map[string]*RateLimitReq) { +// broadcastPeers broadcasts global rate limit statuses to all other peers +func (gm *globalManager) broadcastPeers(updates map[string]*RateLimitReq) { var req UpdatePeerGlobalsReq - start := time.Now() + start := clock.Now() for _, r := range updates { // Copy the original since we removing the GLOBAL behavior @@ -205,7 +205,7 @@ func (gm *globalManager) updatePeers(updates map[string]*RateLimitReq) { status, err := gm.instance.getRateLimit(&rl) if err != nil { - gm.log.WithError(err).Errorf("while sending global updates to peers for: '%s'", rl.HashKey()) + gm.log.WithError(err).Errorf("while broadcasting update to peers for: '%s'", rl.HashKey()) continue } // Build an UpdatePeerGlobalsReq @@ -229,11 +229,11 @@ func (gm *globalManager) updatePeers(updates map[string]*RateLimitReq) { if err != nil { // Skip peers that are not in a ready state if !IsNotReady(err) { - gm.log.WithError(err).Errorf("error sending global updates to '%s'", peer.info.Address) + gm.log.WithError(err).Errorf("while broadcasting global updates to '%s'", peer.info.GRPCAddress) } continue } } - gm.broadcastMetrics.Observe(time.Since(start).Seconds()) + gm.broadcastMetrics.Observe(clock.Since(start).Seconds()) } diff --git a/go.mod b/go.mod index 0a730953..ec20134b 100644 --- a/go.mod +++ b/go.mod @@ -1,17 +1,18 @@ module github.com/mailgun/gubernator +go 1.14 + require ( github.com/coreos/etcd v3.3.15+incompatible github.com/davecgh/go-spew v1.1.1 - github.com/fatih/structs v1.1.0 // indirect github.com/golang/protobuf v1.3.2 github.com/grpc-ecosystem/grpc-gateway v1.11.1 github.com/hashicorp/memberlist v0.2.0 - github.com/mailgun/holster v3.0.0+incompatible - github.com/mailgun/holster/v3 v3.8.1 - github.com/pkg/errors v0.8.1 + github.com/mailgun/holster/v3 v3.14.0 + github.com/pkg/errors v0.9.1 github.com/prometheus/client_golang v1.1.0 github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4 + github.com/prometheus/common v0.6.0 github.com/segmentio/fasthash v1.0.2 github.com/sirupsen/logrus v1.4.2 github.com/stretchr/testify v1.4.0 @@ -23,4 +24,3 @@ require ( k8s.io/klog v0.3.1 ) -go 1.13 diff --git a/go.sum b/go.sum index 9ff515d3..75d1ecf9 100644 --- a/go.sum +++ b/go.sum @@ -36,8 +36,6 @@ github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZm github.com/docker/spdystream v0.0.0-20160310174837-449fdfce4d96/go.mod h1:Qh8CwZgvJUkLughtfhJv5dyTYa91l1fOUCrgjqmcifM= github.com/elazarl/goproxy v0.0.0-20170405201442-c4fc26588b6e/go.mod h1:/Zj4wYkgs4iZTTu3o/KG3Itv/qCCa8VVMlb3i9OVuzc= github.com/evanphx/json-patch v0.0.0-20190203023257-5858425f7550/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= -github.com/fatih/structs v1.1.0 h1:Q7juDM0QtcnhCpeyLGQKyg4TOIghuNXrkL32pHAUMxo= -github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga6PJ7M= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= @@ -127,10 +125,8 @@ github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORN github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= -github.com/mailgun/holster v3.0.0+incompatible h1:bpt8ZCwLBrzjqfBZ5mobNb2NjesNeDHmsOO++Ek9Swc= -github.com/mailgun/holster v3.0.0+incompatible/go.mod h1:crzolGx27RP/IBT/BnPQiYBB9igmAFHGRrz0zlMP0b0= -github.com/mailgun/holster/v3 v3.8.1 h1:Z9D3F1ShnxGUlofougjSht08OpIiQKtryBjExB+uz9Q= -github.com/mailgun/holster/v3 v3.8.1/go.mod h1:rNcFlhMTxFDa1dnQC4sUqI71IpAa9/aPeU6w8IGF3aQ= +github.com/mailgun/holster/v3 v3.14.0 h1:SgDJqxLiHFpOQ5YIn00zZQo79k142DAFvAgVlFKiUvQ= +github.com/mailgun/holster/v3 v3.14.0/go.mod h1:K8liVWqma64dBz3wY3YOf+biRYTZOEESaizryvqJYnI= github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0jegS5sx/RkqARlsWZ6pIwiU= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/miekg/dns v1.1.26 h1:gPxPSwALAeHJSjarOs00QjVdV9QoBvc1D2ujQUr5BzU= @@ -149,8 +145,8 @@ github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c h1:Lgl0gzECD8GnQ5 github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I= -github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= diff --git a/prometheus.go b/grpc_stats.go similarity index 77% rename from prometheus.go rename to grpc_stats.go index ccccda00..9ce786b2 100644 --- a/prometheus.go +++ b/grpc_stats.go @@ -18,16 +18,15 @@ package gubernator import ( "context" - "time" + "github.com/mailgun/holster/v3/clock" "github.com/mailgun/holster/v3/syncutil" "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" "google.golang.org/grpc/stats" ) type GRPCStats struct { - Duration time.Duration + Duration clock.Duration Method string Failed int64 Success int64 @@ -39,22 +38,21 @@ var statsContextKey = contextKey{} // Implements the Prometheus collector interface. Such that when the /metrics handler is // called this collector pulls all the stats from -type Collector struct { +type GRPCStatsHandler struct { reqCh chan *GRPCStats wg syncutil.WaitGroup - // Metrics collectors grpcRequestCount *prometheus.CounterVec grpcRequestDuration *prometheus.HistogramVec } -func NewGRPCStatsHandler() *Collector { - c := &Collector{ - grpcRequestCount: promauto.NewCounterVec(prometheus.CounterOpts{ +func NewGRPCStatsHandler() *GRPCStatsHandler { + c := &GRPCStatsHandler{ + grpcRequestCount: prometheus.NewCounterVec(prometheus.CounterOpts{ Name: "grpc_request_counts", Help: "GRPC requests by status."}, []string{"status", "method"}), - grpcRequestDuration: promauto.NewHistogramVec(prometheus.HistogramOpts{ + grpcRequestDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{ Name: "grpc_request_duration_milliseconds", Help: "GRPC request durations in milliseconds.", }, []string{"method"}), @@ -63,7 +61,7 @@ func NewGRPCStatsHandler() *Collector { return c } -func (c *Collector) run() { +func (c *GRPCStatsHandler) run() { c.reqCh = make(chan *GRPCStats, 10000) c.wg.Until(func(done chan struct{}) bool { @@ -98,11 +96,21 @@ func (c *Collector) run() { }) } -func (c *Collector) Close() { +func (c *GRPCStatsHandler) Describe(ch chan<- *prometheus.Desc) { + c.grpcRequestCount.Describe(ch) + c.grpcRequestDuration.Describe(ch) +} + +func (c *GRPCStatsHandler) Collect(ch chan<- prometheus.Metric) { + c.grpcRequestCount.Collect(ch) + c.grpcRequestDuration.Collect(ch) +} + +func (c *GRPCStatsHandler) Close() { c.wg.Stop() } -func (c *Collector) HandleRPC(ctx context.Context, s stats.RPCStats) { +func (c *GRPCStatsHandler) HandleRPC(ctx context.Context, s stats.RPCStats) { rs := StatsFromContext(ctx) if rs == nil { return @@ -127,13 +135,13 @@ func (c *Collector) HandleRPC(ctx context.Context, s stats.RPCStats) { } } -func (c *Collector) HandleConn(ctx context.Context, s stats.ConnStats) {} +func (c *GRPCStatsHandler) HandleConn(ctx context.Context, s stats.ConnStats) {} -func (c *Collector) TagConn(ctx context.Context, _ *stats.ConnTagInfo) context.Context { +func (c *GRPCStatsHandler) TagConn(ctx context.Context, _ *stats.ConnTagInfo) context.Context { return ctx } -func (c *Collector) TagRPC(ctx context.Context, tagInfo *stats.RPCTagInfo) context.Context { +func (c *GRPCStatsHandler) TagRPC(ctx context.Context, tagInfo *stats.RPCTagInfo) context.Context { return ContextWithStats(ctx, &GRPCStats{Method: tagInfo.FullMethodName}) } diff --git a/gubernator.go b/gubernator.go index e5667669..4c2185e3 100644 --- a/gubernator.go +++ b/gubernator.go @@ -22,6 +22,7 @@ import ( "strings" "sync" + "github.com/mailgun/holster/v3/setter" "github.com/mailgun/holster/v3/syncutil" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" @@ -36,30 +37,32 @@ const ( UnHealthy = "unhealthy" ) -var log *logrus.Entry - -type Instance struct { +type V1Instance struct { health HealthCheckResp global *globalManager mutliRegion *mutliRegionManager peerMutex sync.RWMutex + log logrus.FieldLogger conf Config isClosed bool } -func New(conf Config) (*Instance, error) { +// NewV1Instance instantiate a single instance of a gubernator peer and registers this +// instance with the provided GRPCServer. +func NewV1Instance(conf Config) (*V1Instance, error) { if conf.GRPCServer == nil { return nil, errors.New("GRPCServer instance is required") } - log = logrus.WithField("category", "gubernator") if err := conf.SetDefaults(); err != nil { return nil, err } - s := Instance{ + s := V1Instance{ + log: conf.Logger, conf: conf, } + setter.SetDefault(&s.log, logrus.WithField("category", "gubernator")) s.global = newGlobalManager(conf.Behaviors, &s) s.mutliRegion = newMultiRegionManager(conf.Behaviors, &s) @@ -83,7 +86,7 @@ func New(conf Config) (*Instance, error) { return &s, nil } -func (s *Instance) Close() error { +func (s *V1Instance) Close() error { if s.isClosed { return nil } @@ -106,9 +109,8 @@ func (s *Instance) Close() error { // GetRateLimits is the public interface used by clients to request rate limits from the system. If the // rate limit `Name` and `UniqueKey` is not owned by this instance then we forward the request to the // peer that does. -func (s *Instance) GetRateLimits(ctx context.Context, r *GetRateLimitsReq) (*GetRateLimitsResp, error) { +func (s *V1Instance) GetRateLimits(ctx context.Context, r *GetRateLimitsReq) (*GetRateLimitsResp, error) { var resp GetRateLimitsResp - if len(r.Requests) > maxBatchSize { return nil, status.Errorf(codes.OutOfRange, "Requests.RateLimits list too large; max size is '%d'", maxBatchSize) @@ -181,7 +183,7 @@ func (s *Instance) GetRateLimits(ctx context.Context, r *GetRateLimitsReq) (*Get } // Inform the client of the owner key of the key - inOut.Out.Metadata = map[string]string{"owner": peer.info.Address} + inOut.Out.Metadata = map[string]string{"owner": peer.info.GRPCAddress} out <- inOut return nil @@ -200,7 +202,7 @@ func (s *Instance) GetRateLimits(ctx context.Context, r *GetRateLimitsReq) (*Get } // Inform the client of the owner key of the key - inOut.Out.Metadata = map[string]string{"owner": peer.info.Address} + inOut.Out.Metadata = map[string]string{"owner": peer.info.GRPCAddress} } out <- inOut @@ -222,7 +224,7 @@ func (s *Instance) GetRateLimits(ctx context.Context, r *GetRateLimitsReq) (*Get // getGlobalRateLimit handles rate limits that are marked as `Behavior = GLOBAL`. Rate limit responses // are returned from the local cache and the hits are queued to be sent to the owning peer. -func (s *Instance) getGlobalRateLimit(req *RateLimitReq) (*RateLimitResp, error) { +func (s *V1Instance) getGlobalRateLimit(req *RateLimitReq) (*RateLimitResp, error) { // Queue the hit for async update s.global.QueueHit(req) @@ -247,7 +249,7 @@ func (s *Instance) getGlobalRateLimit(req *RateLimitReq) (*RateLimitResp, error) // UpdatePeerGlobals updates the local cache with a list of global rate limits. This method should only // be called by a peer who is the owner of a global rate limit. -func (s *Instance) UpdatePeerGlobals(ctx context.Context, r *UpdatePeerGlobalsReq) (*UpdatePeerGlobalsResp, error) { +func (s *V1Instance) UpdatePeerGlobals(ctx context.Context, r *UpdatePeerGlobalsReq) (*UpdatePeerGlobalsResp, error) { s.conf.Cache.Lock() defer s.conf.Cache.Unlock() @@ -263,7 +265,7 @@ func (s *Instance) UpdatePeerGlobals(ctx context.Context, r *UpdatePeerGlobalsRe } // GetPeerRateLimits is called by other peers to get the rate limits owned by this peer. -func (s *Instance) GetPeerRateLimits(ctx context.Context, r *GetPeerRateLimitsReq) (*GetPeerRateLimitsResp, error) { +func (s *V1Instance) GetPeerRateLimits(ctx context.Context, r *GetPeerRateLimitsReq) (*GetPeerRateLimitsResp, error) { var resp GetPeerRateLimitsResp if len(r.Requests) > maxBatchSize { @@ -283,7 +285,7 @@ func (s *Instance) GetPeerRateLimits(ctx context.Context, r *GetPeerRateLimitsRe } // HealthCheck Returns the health of our instance. -func (s *Instance) HealthCheck(ctx context.Context, r *HealthCheckReq) (*HealthCheckResp, error) { +func (s *V1Instance) HealthCheck(ctx context.Context, r *HealthCheckReq) (*HealthCheckResp, error) { var errs []string s.peerMutex.RLock() @@ -323,7 +325,7 @@ func (s *Instance) HealthCheck(ctx context.Context, r *HealthCheckReq) (*HealthC return &s.health, nil } -func (s *Instance) getRateLimit(r *RateLimitReq) (*RateLimitResp, error) { +func (s *V1Instance) getRateLimit(r *RateLimitReq) (*RateLimitResp, error) { s.conf.Cache.Lock() defer s.conf.Cache.Unlock() @@ -345,7 +347,7 @@ func (s *Instance) getRateLimit(r *RateLimitReq) (*RateLimitResp, error) { } // SetPeers is called by the implementor to indicate the pool of peers has changed -func (s *Instance) SetPeers(peerInfo []PeerInfo) { +func (s *V1Instance) SetPeers(peerInfo []PeerInfo) { localPicker := s.conf.LocalPicker.New() regionPicker := s.conf.RegionPicker.New() @@ -376,7 +378,7 @@ func (s *Instance) SetPeers(peerInfo []PeerInfo) { s.conf.RegionPicker = regionPicker s.peerMutex.Unlock() - //TODO: This should include the regions peers? log.WithField("peers", peers).Info("Peers updated") + s.log.WithField("peers", peerInfo).Debug("peers updated") // Shutdown any old peers we no longer need ctx, cancel := context.WithTimeout(context.Background(), s.conf.Behaviors.BatchTimeout) @@ -403,7 +405,7 @@ func (s *Instance) SetPeers(peerInfo []PeerInfo) { pc := obj.(*PeerClient) err := pc.Shutdown(ctx) if err != nil { - log.WithError(err).WithField("peer", pc).Error("while shutting down peer") + s.log.WithError(err).WithField("peer", pc).Error("while shutting down peer") } return nil }, p) @@ -411,12 +413,16 @@ func (s *Instance) SetPeers(peerInfo []PeerInfo) { wg.Wait() if len(shutdownPeers) > 0 { - log.WithField("peers", shutdownPeers).Info("Peers shutdown") + var peers []string + for _, p := range shutdownPeers { + peers = append(peers, p.info.GRPCAddress) + } + s.log.WithField("peers", peers).Debug("Peers shutdown") } } // GetPeers returns a peer client for the hash key provided -func (s *Instance) GetPeer(key string) (*PeerClient, error) { +func (s *V1Instance) GetPeer(key string) (*PeerClient, error) { s.peerMutex.RLock() peer, err := s.conf.LocalPicker.Get(key) if err != nil { @@ -427,26 +433,26 @@ func (s *Instance) GetPeer(key string) (*PeerClient, error) { return peer, nil } -func (s *Instance) GetPeerList() []*PeerClient { +func (s *V1Instance) GetPeerList() []*PeerClient { s.peerMutex.RLock() defer s.peerMutex.RUnlock() return s.conf.LocalPicker.Peers() } -func (s *Instance) GetRegionPickers() map[string]PeerPicker { +func (s *V1Instance) GetRegionPickers() map[string]PeerPicker { s.peerMutex.RLock() defer s.peerMutex.RUnlock() return s.conf.RegionPicker.Pickers() } // Describe fetches prometheus metrics to be registered -func (s *Instance) Describe(ch chan<- *prometheus.Desc) { +func (s *V1Instance) Describe(ch chan<- *prometheus.Desc) { ch <- s.global.asyncMetrics.Desc() ch <- s.global.broadcastMetrics.Desc() } // Collect fetches metrics from the server for use by prometheus -func (s *Instance) Collect(ch chan<- prometheus.Metric) { +func (s *V1Instance) Collect(ch chan<- prometheus.Metric) { ch <- s.global.asyncMetrics ch <- s.global.broadcastMetrics } diff --git a/hash.go b/hash.go index 74a9a099..c6a6bdda 100644 --- a/hash.go +++ b/hash.go @@ -28,14 +28,16 @@ import ( type HashFunc func(data []byte) uint32 // Implements PeerPicker -type ConsistantHash struct { +// deprecated +type ConsistentHash struct { hashFunc HashFunc peerKeys []int peerMap map[int]*PeerClient } -func NewConsistantHash(fn HashFunc) *ConsistantHash { - ch := &ConsistantHash{ +// deprecated +func NewConsistentHash(fn HashFunc) *ConsistentHash { + ch := &ConsistentHash{ hashFunc: fn, peerMap: make(map[int]*PeerClient), } @@ -46,14 +48,14 @@ func NewConsistantHash(fn HashFunc) *ConsistantHash { return ch } -func (ch *ConsistantHash) New() PeerPicker { - return &ConsistantHash{ +func (ch *ConsistentHash) New() PeerPicker { + return &ConsistentHash{ hashFunc: ch.hashFunc, peerMap: make(map[int]*PeerClient), } } -func (ch *ConsistantHash) Peers() []*PeerClient { +func (ch *ConsistentHash) Peers() []*PeerClient { var results []*PeerClient for _, v := range ch.peerMap { results = append(results, v) @@ -62,7 +64,7 @@ func (ch *ConsistantHash) Peers() []*PeerClient { } // Adds a peer to the hash -func (ch *ConsistantHash) Add(peer *PeerClient) { +func (ch *ConsistentHash) Add(peer *PeerClient) { hash := int(ch.hashFunc(strToBytesUnsafe(peer.info.HashKey()))) ch.peerKeys = append(ch.peerKeys, hash) ch.peerMap[hash] = peer @@ -70,17 +72,17 @@ func (ch *ConsistantHash) Add(peer *PeerClient) { } // Returns number of peers in the picker -func (ch *ConsistantHash) Size() int { +func (ch *ConsistentHash) Size() int { return len(ch.peerKeys) } // Returns the peer by peer info -func (ch *ConsistantHash) GetByPeerInfo(peer PeerInfo) *PeerClient { +func (ch *ConsistentHash) GetByPeerInfo(peer PeerInfo) *PeerClient { return ch.peerMap[int(ch.hashFunc(strToBytesUnsafe(peer.HashKey())))] } // Given a key, return the peer that key is assigned too -func (ch *ConsistantHash) Get(key string) (*PeerClient, error) { +func (ch *ConsistentHash) Get(key string) (*PeerClient, error) { if ch.Size() == 0 { return nil, errors.New("unable to pick a peer; pool is empty") } diff --git a/hash_test.go b/hash_test.go index 02d26c66..624f8afd 100644 --- a/hash_test.go +++ b/hash_test.go @@ -4,8 +4,8 @@ import ( "math/rand" "net" "testing" - "time" + "github.com/mailgun/holster/v3/clock" "github.com/segmentio/fasthash/fnv1" "github.com/segmentio/fasthash/fnv1a" "github.com/stretchr/testify/assert" @@ -21,49 +21,49 @@ func TestConsistantHash(t *testing.T) { "192.168.1.2": hosts[1], "5f46bb53-6c30-49dc-adb4-b7355058adb6": hosts[1], } - hash := NewConsistantHash(nil) + hash := NewConsistentHash(nil) for _, h := range hosts { - hash.Add(&PeerClient{info: PeerInfo{Address: h}}) + hash.Add(&PeerClient{info: PeerInfo{GRPCAddress: h}}) } for input, addr := range cases { t.Run(input, func(t *testing.T) { peer, err := hash.Get(input) assert.Nil(t, err) - assert.Equal(t, addr, peer.info.Address) + assert.Equal(t, addr, peer.info.GRPCAddress) }) } }) t.Run("Size", func(t *testing.T) { - hash := NewConsistantHash(nil) + hash := NewConsistentHash(nil) for _, h := range hosts { - hash.Add(&PeerClient{info: PeerInfo{Address: h}}) + hash.Add(&PeerClient{info: PeerInfo{GRPCAddress: h}}) } assert.Equal(t, len(hosts), hash.Size()) }) t.Run("Host", func(t *testing.T) { - hash := NewConsistantHash(nil) + hash := NewConsistentHash(nil) hostMap := map[string]*PeerClient{} for _, h := range hosts { - peer := &PeerClient{info: PeerInfo{Address: h}} + peer := &PeerClient{info: PeerInfo{GRPCAddress: h}} hash.Add(peer) hostMap[h] = peer } for host, peer := range hostMap { - assert.Equal(t, peer, hash.GetByPeerInfo(PeerInfo{Address: host})) + assert.Equal(t, peer, hash.GetByPeerInfo(PeerInfo{GRPCAddress: host})) } }) t.Run("distribution", func(t *testing.T) { const cases = 10000 - rand.Seed(time.Now().Unix()) + rand.Seed(clock.Now().Unix()) strings := make([]string, cases) @@ -81,17 +81,17 @@ func TestConsistantHash(t *testing.T) { for name, hashFunc := range hashFuncs { t.Run(name, func(t *testing.T) { - hash := NewConsistantHash(hashFunc) + hash := NewConsistentHash(hashFunc) hostMap := map[string]int{} for _, h := range hosts { - hash.Add(&PeerClient{info: PeerInfo{Address: h}}) + hash.Add(&PeerClient{info: PeerInfo{GRPCAddress: h}}) hostMap[h] = 0 } for i := range strings { peer, _ := hash.Get(strings[i]) - hostMap[peer.info.Address]++ + hostMap[peer.info.GRPCAddress]++ } for host, a := range hostMap { @@ -117,10 +117,10 @@ func BenchmarkConsistantHash(b *testing.B) { ips[i] = net.IPv4(byte(i>>24), byte(i>>16), byte(i>>8), byte(i)).String() } - hash := NewConsistantHash(hashFunc) + hash := NewConsistentHash(hashFunc) hosts := []string{"a.svc.local", "b.svc.local", "c.svc.local"} for _, h := range hosts { - hash.Add(&PeerClient{info: PeerInfo{Address: h}}) + hash.Add(&PeerClient{info: PeerInfo{GRPCAddress: h}}) } b.ResetTimer() diff --git a/interval.go b/interval.go index 39e9d1da..ad9e4580 100644 --- a/interval.go +++ b/interval.go @@ -20,6 +20,7 @@ import ( "errors" "time" + "github.com/mailgun/holster/v3/clock" "github.com/mailgun/holster/v3/syncutil" ) @@ -33,7 +34,7 @@ type Interval struct { // the `C` channel does not return the current time and // `C` channel will only get a tick after `Next()` has // been called. -func NewInterval(d time.Duration) *Interval { +func NewInterval(d clock.Duration) *Interval { i := Interval{ C: make(chan struct{}, 1), in: make(chan struct{}, 1), @@ -42,7 +43,7 @@ func NewInterval(d time.Duration) *Interval { return &i } -func (i *Interval) run(d time.Duration) { +func (i *Interval) run(d clock.Duration) { i.wg.Until(func(done chan struct{}) bool { select { case <-i.in: @@ -78,7 +79,7 @@ const ( ) // GregorianDuration returns the entire duration of the Gregorian interval -func GregorianDuration(now time.Time, d int64) (int64, error) { +func GregorianDuration(now clock.Time, d int64) (int64, error) { switch d { case GregorianMinutes: return 60000, nil @@ -91,14 +92,14 @@ func GregorianDuration(now time.Time, d int64) (int64, error) { case GregorianMonths: y, m, _ := now.Date() // Given the beginning of the month, subtract the end of the current month to get the duration - begin := time.Date(y, m, 1, 0, 0, 0, 0, now.Location()) - end := begin.AddDate(0, 1, 0).Add(-time.Nanosecond) + begin := clock.Date(y, m, 1, 0, 0, 0, 0, now.Location()) + end := begin.AddDate(0, 1, 0).Add(-clock.Nanosecond) return end.UnixNano() - begin.UnixNano()/1000000, nil case GregorianYears: y, _, _ := now.Date() // Given the beginning of the year, subtract the end of the current year to get the duration - begin := time.Date(y, time.January, 1, 0, 0, 0, 0, now.Location()) - end := begin.AddDate(1, 0, 0).Add(-time.Nanosecond) + begin := clock.Date(y, clock.January, 1, 0, 0, 0, 0, now.Location()) + end := begin.AddDate(1, 0, 0).Add(-clock.Nanosecond) return end.UnixNano() - begin.UnixNano()/1000000, nil } return 0, errors.New("behavior DURATION_IS_GREGORIAN is set; but `Duration` is not a valid gregorian interval") @@ -111,34 +112,34 @@ func GregorianDuration(now time.Time, d int64) (int64, error) { // // Example: If `now` is 2019-01-01 11:20:10 and `d` = GregorianMinutes then the return // expire time would be 2019-01-01 11:20:59 in milliseconds since epoch -func GregorianExpiration(now time.Time, d int64) (int64, error) { +func GregorianExpiration(now clock.Time, d int64) (int64, error) { switch d { case GregorianMinutes: - return now.Truncate(time.Minute). - Add(time.Minute-time.Nanosecond). + return now.Truncate(clock.Minute). + Add(clock.Minute-clock.Nanosecond). UnixNano() / 1000000, nil case GregorianHours: y, m, d := now.Date() // See time.Truncate() documentation on why we can' reliably use time.Truncate(Hour) here. - return time.Date(y, m, d, now.Hour(), 0, 0, 0, now.Location()). - Add(time.Hour-time.Nanosecond). + return clock.Date(y, m, d, now.Hour(), 0, 0, 0, now.Location()). + Add(clock.Hour-clock.Nanosecond). UnixNano() / 1000000, nil case GregorianDays: y, m, d := now.Date() - return time.Date(y, m, d, 23, 59, 59, int(time.Second-time.Nanosecond), now.Location()). + return clock.Date(y, m, d, 23, 59, 59, int(clock.Second-clock.Nanosecond), now.Location()). UnixNano() / 1000000, nil case GregorianWeeks: return 0, errors.New("`Duration = GregorianWeeks` not yet supported; consider making a PR!`") case GregorianMonths: y, m, _ := now.Date() - return time.Date(y, m, 1, 0, 0, 0, 0, now.Location()). - AddDate(0, 1, 0).Add(-time.Nanosecond). + return clock.Date(y, m, 1, 0, 0, 0, 0, now.Location()). + AddDate(0, 1, 0).Add(-clock.Nanosecond). UnixNano() / 1000000, nil case GregorianYears: y, _, _ := now.Date() - return time.Date(y, time.January, 1, 0, 0, 0, 0, now.Location()). + return clock.Date(y, clock.January, 1, 0, 0, 0, 0, now.Location()). AddDate(1, 0, 0). - Add(-time.Nanosecond). + Add(-clock.Nanosecond). UnixNano() / 1000000, nil } return 0, errors.New("behavior DURATION_IS_GREGORIAN is set; but `Duration` is not a valid gregorian interval") diff --git a/interval_test.go b/interval_test.go index a4a53575..289fd93c 100644 --- a/interval_test.go +++ b/interval_test.go @@ -17,22 +17,23 @@ limitations under the License. package gubernator_test import ( + "testing" + "github.com/mailgun/gubernator" + "github.com/mailgun/holster/v3/clock" "github.com/stretchr/testify/assert" - "testing" - "time" ) func TestGregorianExpirationMinute(t *testing.T) { // Validate calculation assumption - now := time.Date(2019, time.November, 11, 00, 00, 00, 00, time.UTC) + now := clock.Date(2019, clock.November, 11, 00, 00, 00, 00, clock.UTC) expire, err := gubernator.GregorianExpiration(now, gubernator.GregorianMinutes) assert.Nil(t, err) - assert.Equal(t, time.Date(2019, time.November, 11, 00, 00, 59, 999000000, time.UTC), - time.Unix(0, expire*1000000).UTC()) + assert.Equal(t, clock.Date(2019, clock.November, 11, 00, 00, 59, 999000000, clock.UTC), + clock.Unix(0, expire*1000000).UTC()) // Expect the same expire time regardless of the current second or nsec - now = time.Date(2019, time.November, 11, 00, 00, 30, 100, time.UTC) + now = clock.Date(2019, clock.November, 11, 00, 00, 30, 100, clock.UTC) expire, err = gubernator.GregorianExpiration(now, gubernator.GregorianMinutes) assert.Nil(t, err) assert.Equal(t, int64(1573430459999), expire) @@ -40,14 +41,14 @@ func TestGregorianExpirationMinute(t *testing.T) { func TestGregorianExpirationHour(t *testing.T) { // Validate calculation assumption - now := time.Date(2019, time.November, 11, 00, 00, 00, 00, time.UTC) + now := clock.Date(2019, clock.November, 11, 00, 00, 00, 00, clock.UTC) expire, err := gubernator.GregorianExpiration(now, gubernator.GregorianHours) assert.Nil(t, err) - assert.Equal(t, time.Date(2019, time.November, 11, 00, 59, 59, 999000000, time.UTC), - time.Unix(0, expire*1000000).UTC()) + assert.Equal(t, clock.Date(2019, clock.November, 11, 00, 59, 59, 999000000, clock.UTC), + clock.Unix(0, expire*1000000).UTC()) // Expect the same expire time regardless of the current minute, second or nsec - now = time.Date(2019, time.November, 11, 00, 20, 1, 2134, time.UTC) + now = clock.Date(2019, clock.November, 11, 00, 20, 1, 2134, clock.UTC) expire, err = gubernator.GregorianExpiration(now, gubernator.GregorianHours) assert.Nil(t, err) assert.Equal(t, int64(1573433999999), expire) @@ -55,14 +56,14 @@ func TestGregorianExpirationHour(t *testing.T) { func TestGregorianExpirationDay(t *testing.T) { // Validate calculation assumption - now := time.Date(2019, time.November, 11, 00, 00, 00, 00, time.UTC) + now := clock.Date(2019, clock.November, 11, 00, 00, 00, 00, clock.UTC) expire, err := gubernator.GregorianExpiration(now, gubernator.GregorianDays) assert.Nil(t, err) - assert.Equal(t, time.Date(2019, time.November, 11, 23, 59, 59, 999000000, time.UTC), - time.Unix(0, expire*1000000).UTC()) + assert.Equal(t, clock.Date(2019, clock.November, 11, 23, 59, 59, 999000000, clock.UTC), + clock.Unix(0, expire*1000000).UTC()) // Expect the same expire time regardless of the current hour, minute, second or nsec - now = time.Date(2019, time.November, 11, 12, 10, 9, 2345, time.UTC) + now = clock.Date(2019, clock.November, 11, 12, 10, 9, 2345, clock.UTC) expire, err = gubernator.GregorianExpiration(now, gubernator.GregorianDays) assert.Nil(t, err) assert.Equal(t, int64(1573516799999), expire) @@ -70,44 +71,44 @@ func TestGregorianExpirationDay(t *testing.T) { func TestGregorianExpirationMonth(t *testing.T) { // Validate calculation assumption - now := time.Date(2019, time.November, 1, 00, 00, 00, 00, time.UTC) + now := clock.Date(2019, clock.November, 1, 00, 00, 00, 00, clock.UTC) expire, err := gubernator.GregorianExpiration(now, gubernator.GregorianMonths) assert.Nil(t, err) - assert.Equal(t, time.Date(2019, time.November, 30, 23, 59, 59, 999000000, time.UTC), - time.Unix(0, expire*1000000).UTC()) + assert.Equal(t, clock.Date(2019, clock.November, 30, 23, 59, 59, 999000000, clock.UTC), + clock.Unix(0, expire*1000000).UTC()) // Expect the same expire time regardless of the current day, minute, second or nsec - now = time.Date(2019, time.November, 11, 22, 2, 23, 0, time.UTC) + now = clock.Date(2019, clock.November, 11, 22, 2, 23, 0, clock.UTC) expire, err = gubernator.GregorianExpiration(now, gubernator.GregorianMonths) assert.Nil(t, err) assert.Equal(t, int64(1575158399999), expire) // January has 31 days - now = time.Date(2019, time.January, 1, 00, 00, 00, 00, time.UTC) + now = clock.Date(2019, clock.January, 1, 00, 00, 00, 00, clock.UTC) expire, err = gubernator.GregorianExpiration(now, gubernator.GregorianMonths) assert.Nil(t, err) - eom := time.Date(2019, time.January, 31, 23, 59, 59, 999999999, time.UTC) + eom := clock.Date(2019, clock.January, 31, 23, 59, 59, 999999999, clock.UTC) assert.Equal(t, eom.UnixNano()/1000000, expire) } func TestGregorianExpirationYear(t *testing.T) { // Validate calculation assumption - now := time.Date(2019, time.January, 1, 00, 00, 00, 00, time.UTC) + now := clock.Date(2019, clock.January, 1, 00, 00, 00, 00, clock.UTC) expire, err := gubernator.GregorianExpiration(now, gubernator.GregorianYears) assert.Nil(t, err) - assert.Equal(t, time.Date(2019, time.December, 31, 23, 59, 59, 999000000, time.UTC), - time.Unix(0, expire*1000000).UTC()) + assert.Equal(t, clock.Date(2019, clock.December, 31, 23, 59, 59, 999000000, clock.UTC), + clock.Unix(0, expire*1000000).UTC()) // Expect the same expire time regardless of the current month, day, minute, second or nsec - now = time.Date(2019, time.March, 1, 20, 30, 1231, 0, time.UTC) + now = clock.Date(2019, clock.March, 1, 20, 30, 1231, 0, clock.UTC) expire, err = gubernator.GregorianExpiration(now, gubernator.GregorianYears) assert.Nil(t, err) assert.Equal(t, int64(1577836799999), expire) } func TestGregorianExpirationInvalid(t *testing.T) { - now := time.Date(2019, time.January, 1, 00, 00, 00, 00, time.UTC) + now := clock.Date(2019, clock.January, 1, 00, 00, 00, 00, clock.UTC) expire, err := gubernator.GregorianExpiration(now, 99) assert.NotNil(t, err) assert.Equal(t, int64(0), expire) diff --git a/kubernetes.go b/kubernetes.go index 842c0453..aafd8206 100644 --- a/kubernetes.go +++ b/kubernetes.go @@ -17,10 +17,10 @@ limitations under the License. package gubernator import ( - "context" "fmt" "reflect" + "github.com/mailgun/holster/v3/setter" "github.com/mailgun/holster/v3/syncutil" "github.com/pkg/errors" "github.com/sirupsen/logrus" @@ -34,24 +34,21 @@ import ( ) type K8sPool struct { - client *kubernetes.Clientset - peers map[string]struct{} - cancelCtx context.CancelFunc - wg syncutil.WaitGroup - ctx context.Context - log *logrus.Entry - conf K8sPoolConfig - informer cache.SharedIndexInformer - done chan struct{} + informer cache.SharedIndexInformer + client *kubernetes.Clientset + wg syncutil.WaitGroup + log logrus.FieldLogger + conf K8sPoolConfig + done chan struct{} } type K8sPoolConfig struct { + Logger logrus.FieldLogger OnUpdate UpdateFunc Namespace string Selector string PodIP string PodPort string - Enabled bool } func NewK8sPool(conf K8sPoolConfig) (*K8sPool, error) { @@ -66,12 +63,12 @@ func NewK8sPool(conf K8sPoolConfig) (*K8sPool, error) { } pool := &K8sPool{ - log: logrus.WithField("category", "kubernetes-pool"), - peers: make(map[string]struct{}), done: make(chan struct{}), + log: conf.Logger, client: client, conf: conf, } + setter.SetDefault(&pool.log, logrus.WithField("category", "gubernator")) return pool, pool.start() } @@ -97,26 +94,26 @@ func (e *K8sPool) start() error { e.informer.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(obj interface{}) { key, err := cache.MetaNamespaceKeyFunc(obj) - logrus.Debugf("Queue (Add) '%s' - %s", key, err) + e.log.Debugf("Queue (Add) '%s' - %s", key, err) if err != nil { - logrus.Errorf("while calling MetaNamespaceKeyFunc(): %s", err) + e.log.Errorf("while calling MetaNamespaceKeyFunc(): %s", err) return } }, UpdateFunc: func(obj, new interface{}) { key, err := cache.MetaNamespaceKeyFunc(obj) - logrus.Debugf("Queue (Update) '%s' - %s", key, err) + e.log.Debugf("Queue (Update) '%s' - %s", key, err) if err != nil { - logrus.Errorf("while calling MetaNamespaceKeyFunc(): %s", err) + e.log.Errorf("while calling MetaNamespaceKeyFunc(): %s", err) return } e.updatePeers() }, DeleteFunc: func(obj interface{}) { key, err := cache.MetaNamespaceKeyFunc(obj) - logrus.Debugf("Queue (Delete) '%s' - %s", key, err) + e.log.Debugf("Queue (Delete) '%s' - %s", key, err) if err != nil { - logrus.Errorf("while calling MetaNamespaceKeyFunc(): %s", err) + e.log.Errorf("while calling MetaNamespaceKeyFunc(): %s", err) return } e.updatePeers() @@ -134,23 +131,26 @@ func (e *K8sPool) start() error { } func (e *K8sPool) updatePeers() { - logrus.Debug("Fetching peer list from endpoints API") + e.log.Debug("Fetching peer list from endpoints API") var peers []PeerInfo for _, obj := range e.informer.GetStore().List() { endpoint, ok := obj.(*api_v1.Endpoints) if !ok { - logrus.Errorf("expected type v1.Endpoints got '%s' instead", reflect.TypeOf(obj).String()) + e.log.Errorf("expected type v1.Endpoints got '%s' instead", reflect.TypeOf(obj).String()) } for _, s := range endpoint.Subsets { for _, addr := range s.Addresses { - peer := PeerInfo{Address: fmt.Sprintf("%s:%s", addr.IP, e.conf.PodPort)} + // TODO(thrawn01): Might consider using the `namespace` as the `DataCenter`. We should + // do what ever k8s convention is for identifying a k8s cluster within a federated multi-data + // center setup. + peer := PeerInfo{GRPCAddress: fmt.Sprintf("%s:%s", addr.IP, e.conf.PodPort)} if addr.IP == e.conf.PodIP { peer.IsOwner = true } peers = append(peers, peer) - logrus.Debugf("Peer: %+v\n", peer) + e.log.Debugf("Peer: %+v\n", peer) } } } diff --git a/memberlist.go b/memberlist.go index 171c1fee..03b5b649 100644 --- a/memberlist.go +++ b/memberlist.go @@ -1,95 +1,157 @@ +/* +Copyright 2018-2020 Mailgun Technologies Inc + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + package gubernator import ( + "bufio" "bytes" + "context" "encoding/gob" - "fmt" + "encoding/json" "io" - l "log" + "net" + "runtime" "strconv" - "strings" - "time" ml "github.com/hashicorp/memberlist" + "github.com/mailgun/holster/v3/clock" + "github.com/mailgun/holster/v3/retry" + "github.com/mailgun/holster/v3/setter" "github.com/pkg/errors" + "github.com/sirupsen/logrus" ) -type MemberlistPool struct { - memberlist *ml.Memberlist - conf MemberlistPoolConfig - events *memberlistEventHandler +type MemberListPool struct { + log logrus.FieldLogger + memberList *ml.Memberlist + conf MemberListPoolConfig + events *memberListEventHandler } -type MemberlistPoolConfig struct { +type MemberListPoolConfig struct { + // (Required) This is the address:port the member list protocol listen for other members on + MemberListAddress string + + // (Required) This is the address:port the member list will advertise to other members it finds AdvertiseAddress string - AdvertisePort int - NodeName string - KnownNodes []string - LoggerOutput io.Writer - Logger *l.Logger - DataCenter string - GubernatorPort int - OnUpdate UpdateFunc - Enabled bool + + // (Required) A list of nodes this member list instance can contact to find other members. + KnownNodes []string + + // (Required) A callback function which is called when the member list changes + OnUpdate UpdateFunc + + // (Optional) The name of the node this member list identifies itself as. + NodeName string + + // (Optional) An interface through which logging will occur (Usually *logrus.Entry) + Logger logrus.FieldLogger + + // (Optional) The datacenter this instance belongs too + DataCenter string } -func NewMemberlistPool(conf MemberlistPoolConfig) (*MemberlistPool, error) { - memberlistPool := &MemberlistPool{conf: conf} +func NewMemberListPool(ctx context.Context, conf MemberListPoolConfig) (*MemberListPool, error) { + setter.SetDefault(conf.Logger, logrus.WithField("category", "gubernator")) + m := &MemberListPool{ + log: conf.Logger, + conf: conf, + } + + host, port, err := splitAddress(conf.MemberListAddress) + if err != nil { + return nil, errors.Wrap(err, "MemberListAddress=`%s` is invalid;") + } + + // Member list requires the address to be an ip address + if ip := net.ParseIP(host); ip == nil { + addrs, err := net.LookupHost(host) + if err != nil { + return nil, errors.Wrapf(err, "while preforming host lookup for '%s'", host) + } + if len(addrs) == 0 { + return nil, errors.Wrapf(err, "net.LookupHost() returned no addresses for '%s'", host) + } + host = addrs[0] + } - // Configure memberlist event handler - memberlistPool.events = newMemberListEventHandler(conf.OnUpdate) + _, advPort, err := splitAddress(conf.AdvertiseAddress) + if err != nil { + return nil, errors.Wrap(err, "AdvertiseAddress=`%s` is invalid;") + } + + // Configure member list event handler + m.events = newMemberListEventHandler(m.log, conf) - // Configure memberlist + // Configure member list config := ml.DefaultWANConfig() - config.Events = memberlistPool.events - config.AdvertiseAddr = conf.AdvertiseAddress - config.AdvertisePort = conf.AdvertisePort + config.Events = m.events + config.AdvertiseAddr = host + config.AdvertisePort = port if conf.NodeName != "" { config.Name = conf.NodeName } - if conf.LoggerOutput != nil { - config.LogOutput = conf.LoggerOutput - } - - if conf.Logger != nil { - config.Logger = conf.Logger - } + config.LogOutput = newLogWriter(m.log) - // Create and set memberlist - memberlist, err := ml.Create(config) + // Create and set member list + memberList, err := ml.Create(config) if err != nil { return nil, err } - memberlistPool.memberlist = memberlist + m.memberList = memberList // Prep metadata - gob.Register(memberlistMetadata{}) - metadata := memberlistMetadata{DataCenter: conf.DataCenter, GubernatorPort: conf.GubernatorPort} + gob.Register(memberListMetadata{}) + metadata := memberListMetadata{ + DataCenter: conf.DataCenter, + AdvertiseAddress: conf.AdvertiseAddress, + GubernatorPort: advPort, + } - // Join memberlist pool - err = memberlistPool.joinPool(conf.KnownNodes, metadata) + // Join member list pool + err = m.joinPool(ctx, conf.KnownNodes, metadata) if err != nil { - return nil, err + return nil, errors.Wrap(err, "while attempting to join the member-list pool") } - return memberlistPool, nil + return m, nil } -func (m *MemberlistPool) joinPool(knownNodes []string, metadata memberlistMetadata) error { +func (m *MemberListPool) joinPool(ctx context.Context, knownNodes []string, metadata memberListMetadata) error { // Get local node and set metadata - node := m.memberlist.LocalNode() - serializedMetadata, err := serializeMemberlistMetadata(metadata) + node := m.memberList.LocalNode() + serializedMetadata, err := serializeMemberListMetadata(metadata) if err != nil { return err } node.Meta = serializedMetadata - // Join memberlist - _, err = m.memberlist.Join(knownNodes) + err = retry.Until(ctx, retry.Interval(clock.Millisecond*300), func(ctx context.Context, i int) error { + // Join member list + _, err = m.memberList.Join(knownNodes) + if err != nil { + return errors.Wrap(err, "while joining member-list") + } + return nil + }) if err != nil { - return errors.Wrap(err, "while joining memberlist") + return errors.Wrap(err, "timed out attempting to join member list") } // Add the local node to the event handler's peer list @@ -98,57 +160,65 @@ func (m *MemberlistPool) joinPool(knownNodes []string, metadata memberlistMetada return nil } -func (m *MemberlistPool) Close() { - err := m.memberlist.Leave(time.Second) +func (m *MemberListPool) Close() { + err := m.memberList.Leave(clock.Second) if err != nil { - log.Warn(errors.Wrap(err, "while leaving memberlist")) + m.log.Warn(errors.Wrap(err, "while leaving member-list")) } } -type memberlistEventHandler struct { - peers map[string]PeerInfo - OnUpdate UpdateFunc +type memberListEventHandler struct { + peers map[string]PeerInfo + log logrus.FieldLogger + conf MemberListPoolConfig } -func newMemberListEventHandler(onUpdate UpdateFunc) *memberlistEventHandler { - eventhandler := memberlistEventHandler{OnUpdate: onUpdate} - eventhandler.peers = make(map[string]PeerInfo) - return &eventhandler +func newMemberListEventHandler(log logrus.FieldLogger, conf MemberListPoolConfig) *memberListEventHandler { + handler := memberListEventHandler{ + conf: conf, + log: log, + } + handler.peers = make(map[string]PeerInfo) + return &handler } -func (e *memberlistEventHandler) addPeer(node *ml.Node) { +func (e *memberListEventHandler) addPeer(node *ml.Node) { ip := getIP(node.Address()) // Deserialize metadata - metadata, err := deserializeMemberlistMetadata(node.Meta) + metadata, err := deserializeMemberListMetadata(node.Meta) if err != nil { - log.Warn(errors.Wrap(err, "while adding to peers")) + e.log.WithError(err).Warnf("while adding to peers") } else { - // Construct Gubernator address and create PeerInfo - gubernatorAddress := makeAddress(ip, metadata.GubernatorPort) - e.peers[ip] = PeerInfo{Address: gubernatorAddress, DataCenter: metadata.DataCenter} + // Handle deprecated GubernatorPort + if metadata.AdvertiseAddress == "" { + metadata.AdvertiseAddress = makeAddress(ip, metadata.GubernatorPort) + } + e.peers[ip] = PeerInfo{GRPCAddress: metadata.AdvertiseAddress, DataCenter: metadata.DataCenter} e.callOnUpdate() } } -func (e *memberlistEventHandler) NotifyJoin(node *ml.Node) { +func (e *memberListEventHandler) NotifyJoin(node *ml.Node) { ip := getIP(node.Address()) // Deserialize metadata - metadata, err := deserializeMemberlistMetadata(node.Meta) + metadata, err := deserializeMemberListMetadata(node.Meta) if err != nil { // This is called during memberlist initialization due to the fact that the local node // has no metadata yet - log.Warn(errors.Wrap(err, "while joining memberlist")) + e.log.WithError(err).Warn("while deserialize member-list metadata") } else { - // Construct Gubernator address and create PeerInfo - gubernatorAddress := makeAddress(ip, metadata.GubernatorPort) - e.peers[ip] = PeerInfo{Address: gubernatorAddress, DataCenter: metadata.DataCenter} + // Handle deprecated GubernatorPort + if metadata.AdvertiseAddress == "" { + metadata.AdvertiseAddress = makeAddress(ip, metadata.GubernatorPort) + } + e.peers[ip] = PeerInfo{GRPCAddress: metadata.AdvertiseAddress, DataCenter: metadata.DataCenter} e.callOnUpdate() } } -func (e *memberlistEventHandler) NotifyLeave(node *ml.Node) { +func (e *memberListEventHandler) NotifyLeave(node *ml.Node) { ip := getIP(node.Address()) // Remove PeerInfo @@ -157,70 +227,97 @@ func (e *memberlistEventHandler) NotifyLeave(node *ml.Node) { e.callOnUpdate() } -func (e *memberlistEventHandler) NotifyUpdate(node *ml.Node) { +func (e *memberListEventHandler) NotifyUpdate(node *ml.Node) { ip := getIP(node.Address()) // Deserialize metadata - metadata, err := deserializeMemberlistMetadata(node.Meta) + metadata, err := deserializeMemberListMetadata(node.Meta) if err != nil { - log.Warn(errors.Wrap(err, "while updating memberlist")) + e.log.WithError(err).Warn("while updating member-list") } else { // Construct Gubernator address and create PeerInfo gubernatorAddress := makeAddress(ip, metadata.GubernatorPort) - e.peers[ip] = PeerInfo{Address: gubernatorAddress, DataCenter: metadata.DataCenter} + e.peers[ip] = PeerInfo{GRPCAddress: gubernatorAddress, DataCenter: metadata.DataCenter} e.callOnUpdate() } } -func (e *memberlistEventHandler) callOnUpdate() { - var peers = []PeerInfo{} +func (e *memberListEventHandler) callOnUpdate() { + var peers []PeerInfo for _, p := range e.peers { + if p.GRPCAddress == e.conf.AdvertiseAddress { + p.IsOwner = true + } peers = append(peers, p) } - - e.OnUpdate(peers) + e.conf.OnUpdate(peers) } func getIP(address string) string { - return strings.Split(address, ":")[0] + addr, _, _ := net.SplitHostPort(address) + return addr } func makeAddress(ip string, port int) string { - return fmt.Sprintf("%s:%s", ip, strconv.Itoa(port)) + return net.JoinHostPort(ip, strconv.Itoa(port)) } -type memberlistMetadata struct { - DataCenter string +type memberListMetadata struct { + DataCenter string + AdvertiseAddress string + // Deprecated GubernatorPort int } -func serializeMemberlistMetadata(metadata memberlistMetadata) ([]byte, error) { - buf := bytes.Buffer{} - encoder := gob.NewEncoder(&buf) - - err := encoder.Encode(metadata) +func serializeMemberListMetadata(metadata memberListMetadata) ([]byte, error) { + b, err := json.Marshal(&metadata) if err != nil { - log.Warn(errors.Wrap(err, "error encoding")) - return nil, err + return nil, errors.Wrap(err, "error marshalling metadata as JSON") } - - return buf.Bytes(), nil + return b, nil } -func deserializeMemberlistMetadata(metadataAsByteSlice []byte) (*memberlistMetadata, error) { - metadata := memberlistMetadata{} - buf := bytes.Buffer{} - - buf.Write(metadataAsByteSlice) +func deserializeMemberListMetadata(b []byte) (*memberListMetadata, error) { + var metadata memberListMetadata + if err := json.Unmarshal(b, &metadata); err != nil { + decoder := gob.NewDecoder(bytes.NewBuffer(b)) + if err := decoder.Decode(&metadata); err != nil { + return nil, errors.Wrap(err, "error decoding metadata") + } + } + return &metadata, nil +} - decoder := gob.NewDecoder(&buf) +func newLogWriter(log logrus.FieldLogger) *io.PipeWriter { + reader, writer := io.Pipe() + + go func() { + scanner := bufio.NewScanner(reader) + for scanner.Scan() { + log.Info(scanner.Text()) + } + if err := scanner.Err(); err != nil { + log.Errorf("Error while reading from Writer: %s", err) + } + reader.Close() + }() + runtime.SetFinalizer(writer, func(w *io.PipeWriter) { + writer.Close() + }) + + return writer +} - err := decoder.Decode(&metadata) +func splitAddress(addr string) (string, int, error) { + host, port, err := net.SplitHostPort(addr) if err != nil { - log.Warn(errors.Wrap(err, "error decoding")) - return nil, err + return host, 0, errors.New(" expected format is `address:port`") } - return &metadata, nil + intPort, err := strconv.Atoi(port) + if err != nil { + return host, intPort, errors.Wrap(err, "port must be a number") + } + return host, intPort, nil } diff --git a/multiregion.go b/multiregion.go index 5843ca7c..6c49c96f 100644 --- a/multiregion.go +++ b/multiregion.go @@ -9,14 +9,15 @@ type mutliRegionManager struct { reqQueue chan *RateLimitReq wg syncutil.WaitGroup conf BehaviorConfig - log *logrus.Entry - instance *Instance + log logrus.FieldLogger + instance *V1Instance } -func newMultiRegionManager(conf BehaviorConfig, instance *Instance) *mutliRegionManager { +func newMultiRegionManager(conf BehaviorConfig, instance *V1Instance) *mutliRegionManager { mm := mutliRegionManager{ conf: conf, instance: instance, + log: instance.log, reqQueue: make(chan *RateLimitReq, 0), } mm.runAsyncReqs() @@ -48,7 +49,7 @@ func (mm *mutliRegionManager) runAsyncReqs() { // Send the hits if we reached our batch limit if len(hits) == mm.conf.MultiRegionBatchLimit { for dc, picker := range mm.instance.GetRegionPickers() { - log.Infof("Sending %v hit(s) to %s picker", len(hits), dc) + mm.log.Debugf("Sending %v hit(s) to %s picker", len(hits), dc) mm.sendHits(hits, picker) } hits = make(map[string]*RateLimitReq) @@ -62,7 +63,7 @@ func (mm *mutliRegionManager) runAsyncReqs() { case <-interval.C: if len(hits) > 0 { for dc, picker := range mm.instance.GetRegionPickers() { - log.Infof("Sending %v hit(s) to %s picker", len(hits), dc) + mm.log.Debugf("Sending %v hit(s) to %s picker", len(hits), dc) mm.sendHits(hits, picker) } hits = make(map[string]*RateLimitReq) diff --git a/net.go b/net.go new file mode 100644 index 00000000..1a252920 --- /dev/null +++ b/net.go @@ -0,0 +1,70 @@ +package gubernator + +import ( + "net" + "os" + + "github.com/mailgun/holster/v3/slice" + "github.com/pkg/errors" +) + +// If the passed address is "0.0.0.0" or "::" attempts to discover the actual ip address of the host +func ResolveHostIP(addr string) (string, error) { + if slice.ContainsString(addr, []string{"0.0.0.0", "::", "0:0:0:0:0:0:0:0"}, nil) { + // Use the hostname as the advertise address as it's most likely to be the external interface + domainName, err := os.Hostname() + if err != nil { + addr, err = discoverIP() + if err != nil { + return "", errors.Wrapf(err, "while discovering ip for '%s'", addr) + } + return addr, nil + } + addrs, err := net.LookupHost(domainName) + if err != nil { + return "", errors.Wrapf(err, "while preforming host lookup for '%s'", domainName) + } + if len(addrs) == 0 { + return "", errors.Wrapf(err, "net.LookupHost() returned no addresses for '%s'", domainName) + } + return addrs[0], nil + } + return addr, nil +} + +func discoverIP() (string, error) { + ifaces, err := net.Interfaces() + if err != nil { + return "", err + } + for _, iface := range ifaces { + if iface.Flags&net.FlagUp == 0 { + continue // interface down + } + if iface.Flags&net.FlagLoopback != 0 { + continue // loopback interface + } + addrs, err := iface.Addrs() + if err != nil { + return "", err + } + for _, addr := range addrs { + var ip net.IP + switch v := addr.(type) { + case *net.IPNet: + ip = v.IP + case *net.IPAddr: + ip = v.IP + } + if ip == nil || ip.IsLoopback() { + continue + } + ip = ip.To4() + if ip == nil { + continue // not an ipv4 address + } + return ip.String(), nil + } + } + return "", errors.New("Unable to detect external ip address; please set `GUBER_ADVERTISE_ADDRESS`?") +} diff --git a/peer_client.go b/peer_client.go index daef6bac..473e1f45 100644 --- a/peer_client.go +++ b/peer_client.go @@ -20,8 +20,8 @@ import ( "context" "fmt" "sync" - "time" + "github.com/mailgun/holster/v3/clock" "github.com/mailgun/holster/v3/collections" "github.com/pkg/errors" "google.golang.org/grpc" @@ -108,10 +108,10 @@ func (c *PeerClient) connect() error { } var err error - // c.conn, err = grpc.Dial(fmt.Sprintf("%s:%s", c.info.Address, ""), grpc.WithInsecure()) - c.conn, err = grpc.Dial(c.info.Address, grpc.WithInsecure()) + // c.conn, err = grpc.Dial(fmt.Sprintf("%s:%s", c.info.GRPCAddress, ""), grpc.WithInsecure()) + c.conn, err = grpc.Dial(c.info.GRPCAddress, grpc.WithInsecure()) if err != nil { - return c.setLastErr(&PeerErr{err: errors.Wrapf(err, "failed to dial peer %s", c.info.Address)}) + return c.setLastErr(&PeerErr{err: errors.Wrapf(err, "failed to dial peer %s", c.info.GRPCAddress)}) } c.client = NewPeersV1Client(c.conn) c.status = peerConnected @@ -122,6 +122,11 @@ func (c *PeerClient) connect() error { return nil } +// PeerInfo returns PeerInfo struct that describes this PeerClient +func (c *PeerClient) PeerInfo() PeerInfo { + return c.info +} + // GetPeerRateLimit forwards a rate limit request to a peer. If the rate limit has `behavior == BATCHING` configured // this method will attempt to batch the rate limits func (c *PeerClient) GetPeerRateLimit(ctx context.Context, r *RateLimitReq) (*RateLimitResp, error) { @@ -196,11 +201,11 @@ func (c *PeerClient) setLastErr(err error) error { } // Prepend client address to error - errWithHostname := errors.Wrap(err, fmt.Sprintf("from host %s", c.info.Address)) + errWithHostname := errors.Wrap(err, fmt.Sprintf("from host %s", c.info.GRPCAddress)) key := err.Error() // Add error to the cache with a TTL of 5 minutes - c.lastErrs.AddWithTTL(key, errWithHostname, time.Minute*5) + c.lastErrs.AddWithTTL(key, errWithHostname, clock.Minute*5) return err } diff --git a/peer_client_test.go b/peer_client_test.go index af0e3f1e..18d45688 100644 --- a/peer_client_test.go +++ b/peer_client_test.go @@ -5,10 +5,10 @@ import ( "runtime" "sync" "testing" - "time" "github.com/mailgun/gubernator" "github.com/mailgun/gubernator/cluster" + "github.com/mailgun/holster/v3/clock" "github.com/stretchr/testify/assert" ) @@ -27,12 +27,12 @@ func TestPeerClientShutdown(t *testing.T) { } config := gubernator.BehaviorConfig{ - BatchTimeout: 250 * time.Millisecond, - BatchWait: 250 * time.Millisecond, + BatchTimeout: 250 * clock.Millisecond, + BatchWait: 250 * clock.Millisecond, BatchLimit: 100, - GlobalSyncWait: 250 * time.Millisecond, - GlobalTimeout: 250 * time.Millisecond, + GlobalSyncWait: 250 * clock.Millisecond, + GlobalTimeout: 250 * clock.Millisecond, GlobalBatchLimit: 100, } @@ -40,7 +40,6 @@ func TestPeerClientShutdown(t *testing.T) { c := cases[i] t.Run(c.Name, func(t *testing.T) { - client := gubernator.NewPeerClient(config, cluster.GetRandomPeer()) wg := sync.WaitGroup{} diff --git a/region_picker.go b/region_picker.go index 29dda7bf..dc9f3158 100644 --- a/region_picker.go +++ b/region_picker.go @@ -15,7 +15,7 @@ type RegionPeerPicker interface { // RegionPicker encapsulates pickers for a set of regions type RegionPicker struct { - *ConsistantHash + *ReplicatedConsistentHash // A map of all the pickers by region regions map[string]PeerPicker @@ -25,21 +25,21 @@ type RegionPicker struct { reqQueue chan *RateLimitReq } -func NewRegionPicker(fn HashFunc) *RegionPicker { +func NewRegionPicker(fn HashFunc64) *RegionPicker { rp := &RegionPicker{ - regions: make(map[string]PeerPicker), - reqQueue: make(chan *RateLimitReq, 0), - ConsistantHash: NewConsistantHash(fn), + regions: make(map[string]PeerPicker), + reqQueue: make(chan *RateLimitReq, 0), + ReplicatedConsistentHash: NewReplicatedConsistentHash(fn, DefaultReplicas), } return rp } func (rp *RegionPicker) New() RegionPeerPicker { - hash := rp.ConsistantHash.New().(*ConsistantHash) + hash := rp.ReplicatedConsistentHash.New().(*ReplicatedConsistentHash) return &RegionPicker{ - regions: make(map[string]PeerPicker), - reqQueue: make(chan *RateLimitReq, 0), - ConsistantHash: hash, + regions: make(map[string]PeerPicker), + reqQueue: make(chan *RateLimitReq, 0), + ReplicatedConsistentHash: hash, } } @@ -88,7 +88,7 @@ func (rp *RegionPicker) Peers() []*PeerClient { func (rp *RegionPicker) Add(peer *PeerClient) { picker, ok := rp.regions[peer.info.DataCenter] if !ok { - picker = rp.ConsistantHash.New() + picker = rp.ReplicatedConsistentHash.New() rp.regions[peer.info.DataCenter] = picker } picker.Add(peer) diff --git a/replicated_hash.go b/replicated_hash.go index 9a1b3b7e..ed02042b 100644 --- a/replicated_hash.go +++ b/replicated_hash.go @@ -17,6 +17,8 @@ limitations under the License. package gubernator import ( + "crypto/md5" + "fmt" "sort" "strconv" @@ -31,7 +33,7 @@ type HashFunc64 func(data []byte) uint64 var DefaultHash64 HashFunc64 = fnv1.HashBytes64 // Implements PeerPicker -type ReplicatedConsistantHash struct { +type ReplicatedConsistentHash struct { hashFunc HashFunc64 peerKeys []peerInfo peers map[string]*PeerClient @@ -43,8 +45,8 @@ type peerInfo struct { peer *PeerClient } -func NewReplicatedConsistantHash(fn HashFunc64, replicas int) *ReplicatedConsistantHash { - ch := &ReplicatedConsistantHash{ +func NewReplicatedConsistentHash(fn HashFunc64, replicas int) *ReplicatedConsistentHash { + ch := &ReplicatedConsistentHash{ hashFunc: fn, peers: make(map[string]*PeerClient), replicas: replicas, @@ -56,15 +58,15 @@ func NewReplicatedConsistantHash(fn HashFunc64, replicas int) *ReplicatedConsist return ch } -func (ch *ReplicatedConsistantHash) New() PeerPicker { - return &ReplicatedConsistantHash{ +func (ch *ReplicatedConsistentHash) New() PeerPicker { + return &ReplicatedConsistentHash{ hashFunc: ch.hashFunc, peers: make(map[string]*PeerClient), replicas: ch.replicas, } } -func (ch *ReplicatedConsistantHash) Peers() []*PeerClient { +func (ch *ReplicatedConsistentHash) Peers() []*PeerClient { var results []*PeerClient for _, v := range ch.peers { results = append(results, v) @@ -73,11 +75,12 @@ func (ch *ReplicatedConsistantHash) Peers() []*PeerClient { } // Adds a peer to the hash -func (ch *ReplicatedConsistantHash) Add(peer *PeerClient) { - ch.peers[peer.info.Address] = peer +func (ch *ReplicatedConsistentHash) Add(peer *PeerClient) { + ch.peers[peer.info.GRPCAddress] = peer + key := fmt.Sprintf("%x", md5.Sum([]byte(peer.info.GRPCAddress))) for i := 0; i < ch.replicas; i++ { - hash := ch.hashFunc(strToBytesUnsafe(strconv.Itoa(i) + peer.info.Address)) + hash := ch.hashFunc(strToBytesUnsafe(strconv.Itoa(i) + key)) ch.peerKeys = append(ch.peerKeys, peerInfo{ hash: hash, peer: peer, @@ -88,17 +91,17 @@ func (ch *ReplicatedConsistantHash) Add(peer *PeerClient) { } // Returns number of peers in the picker -func (ch *ReplicatedConsistantHash) Size() int { +func (ch *ReplicatedConsistentHash) Size() int { return len(ch.peers) } // Returns the peer by hostname -func (ch *ReplicatedConsistantHash) GetByPeerInfo(peer PeerInfo) *PeerClient { - return ch.peers[peer.Address] +func (ch *ReplicatedConsistentHash) GetByPeerInfo(peer PeerInfo) *PeerClient { + return ch.peers[peer.GRPCAddress] } // Given a key, return the peer that key is assigned too -func (ch *ReplicatedConsistantHash) Get(key string) (*PeerClient, error) { +func (ch *ReplicatedConsistentHash) Get(key string) (*PeerClient, error) { if ch.Size() == 0 { return nil, errors.New("unable to pick a peer; pool is empty") } diff --git a/replicated_hash_test.go b/replicated_hash_test.go index cb2fd5e9..d297ed29 100644 --- a/replicated_hash_test.go +++ b/replicated_hash_test.go @@ -4,8 +4,8 @@ import ( "math/rand" "net" "testing" - "time" + "github.com/mailgun/holster/v3/clock" "github.com/segmentio/fasthash/fnv1" "github.com/segmentio/fasthash/fnv1a" "github.com/stretchr/testify/assert" @@ -15,33 +15,33 @@ func TestReplicatedConsistantHash(t *testing.T) { hosts := []string{"a.svc.local", "b.svc.local", "c.svc.local"} t.Run("Size", func(t *testing.T) { - hash := NewReplicatedConsistantHash(nil, DefaultReplicas) + hash := NewReplicatedConsistentHash(nil, DefaultReplicas) for _, h := range hosts { - hash.Add(&PeerClient{info: PeerInfo{Address: h}}) + hash.Add(&PeerClient{info: PeerInfo{GRPCAddress: h}}) } assert.Equal(t, len(hosts), hash.Size()) }) t.Run("Host", func(t *testing.T) { - hash := NewReplicatedConsistantHash(nil, DefaultReplicas) + hash := NewReplicatedConsistentHash(nil, DefaultReplicas) hostMap := map[string]*PeerClient{} for _, h := range hosts { - peer := &PeerClient{info: PeerInfo{Address: h}} + peer := &PeerClient{info: PeerInfo{GRPCAddress: h}} hash.Add(peer) hostMap[h] = peer } for host, peer := range hostMap { - assert.Equal(t, peer, hash.GetByPeerInfo(PeerInfo{Address: host})) + assert.Equal(t, peer, hash.GetByPeerInfo(PeerInfo{GRPCAddress: host})) } }) t.Run("distribution", func(t *testing.T) { const cases = 10000 - rand.Seed(time.Now().Unix()) + rand.Seed(clock.Now().Unix()) strings := make([]string, cases) @@ -58,17 +58,17 @@ func TestReplicatedConsistantHash(t *testing.T) { for name, hashFunc := range hashFuncs { t.Run(name, func(t *testing.T) { - hash := NewReplicatedConsistantHash(hashFunc, DefaultReplicas) + hash := NewReplicatedConsistentHash(hashFunc, DefaultReplicas) hostMap := map[string]int{} for _, h := range hosts { - hash.Add(&PeerClient{info: PeerInfo{Address: h}}) + hash.Add(&PeerClient{info: PeerInfo{GRPCAddress: h}}) hostMap[h] = 0 } for i := range strings { peer, _ := hash.Get(strings[i]) - hostMap[peer.info.Address]++ + hostMap[peer.info.GRPCAddress]++ } for host, a := range hostMap { @@ -93,10 +93,10 @@ func BenchmarkReplicatedConsistantHash(b *testing.B) { ips[i] = net.IPv4(byte(i>>24), byte(i>>16), byte(i>>8), byte(i)).String() } - hash := NewReplicatedConsistantHash(hashFunc, DefaultReplicas) + hash := NewReplicatedConsistentHash(hashFunc, DefaultReplicas) hosts := []string{"a.svc.local", "b.svc.local", "c.svc.local"} for _, h := range hosts { - hash.Add(&PeerClient{info: PeerInfo{Address: h}}) + hash.Add(&PeerClient{info: PeerInfo{GRPCAddress: h}}) } b.ResetTimer() diff --git a/store_test.go b/store_test.go index 7189d75f..416074b7 100644 --- a/store_test.go +++ b/store_test.go @@ -18,32 +18,76 @@ package gubernator_test import ( "context" + "fmt" + "net" "testing" - "time" "github.com/mailgun/gubernator" - "github.com/mailgun/gubernator/cluster" + "github.com/mailgun/holster/v3/clock" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "google.golang.org/grpc" ) +type v1Server struct { + conf gubernator.Config + listener net.Listener + srv *gubernator.V1Instance +} + +func (s *v1Server) Close() { + s.conf.GRPCServer.GracefulStop() + s.srv.Close() +} + +// Start a single instance of V1Server with the provided config and listening address. +func newV1Server(t *testing.T, address string, conf gubernator.Config) *v1Server { + t.Helper() + conf.GRPCServer = grpc.NewServer() + + srv, err := gubernator.NewV1Instance(conf) + require.NoError(t, err) + + listener, err := net.Listen("tcp", address) + require.NoError(t, err) + + go func() { + if err := conf.GRPCServer.Serve(listener); err != nil { + fmt.Printf("while serving: %s\n", err) + } + }() + + srv.SetPeers([]gubernator.PeerInfo{{GRPCAddress: listener.Addr().String(), IsOwner: true}}) + + ctx, cancel := context.WithTimeout(context.Background(), clock.Second*10) + + err = gubernator.WaitForConnect(ctx, []string{listener.Addr().String()}) + require.NoError(t, err) + cancel() + + return &v1Server{ + conf: conf, + listener: listener, + srv: srv, + } +} + func TestLoader(t *testing.T) { loader := gubernator.NewMockLoader() - ins, err := cluster.StartInstance("", gubernator.Config{ + srv := newV1Server(t, "", gubernator.Config{ Behaviors: gubernator.BehaviorConfig{ - GlobalSyncWait: time.Millisecond * 50, // Suitable for testing but not production - GlobalTimeout: time.Second, + GlobalSyncWait: clock.Millisecond * 50, // Suitable for testing but not production + GlobalTimeout: clock.Second, }, Loader: loader, }) - assert.Nil(t, err) // loader.Load() should have been called for gubernator startup assert.Equal(t, 1, loader.Called["Load()"]) assert.Equal(t, 0, loader.Called["Save()"]) - client, err := gubernator.DialV1Server(ins.Address) + client, err := gubernator.DialV1Server(srv.listener.Addr().String()) assert.Nil(t, err) resp, err := client.GetRateLimits(context.Background(), &gubernator.GetRateLimitsReq{ @@ -63,8 +107,7 @@ func TestLoader(t *testing.T) { require.Equal(t, 1, len(resp.Responses)) require.Equal(t, "", resp.Responses[0].Error) - err = ins.Stop() - require.Nil(t, err) + srv.Close() // Loader.Save() should been called during gubernator shutdown assert.Equal(t, 1, loader.Called["Load()"]) @@ -164,20 +207,19 @@ func TestStore(t *testing.T) { t.Run(tt.name, func(t *testing.T) { store := gubernator.NewMockStore() - ins, err := cluster.StartInstance("", gubernator.Config{ + srv := newV1Server(t, "", gubernator.Config{ Behaviors: gubernator.BehaviorConfig{ - GlobalSyncWait: time.Millisecond * 50, // Suitable for testing but not production - GlobalTimeout: time.Second, + GlobalSyncWait: clock.Millisecond * 50, // Suitable for testing but not production + GlobalTimeout: clock.Second, }, Store: store, }) - assert.Nil(t, err) // No calls to store assert.Equal(t, 0, store.Called["OnChange()"]) assert.Equal(t, 0, store.Called["Get()"]) - client, err := gubernator.DialV1Server(ins.Address) + client, err := gubernator.DialV1Server(srv.listener.Addr().String()) assert.Nil(t, err) req := gubernator.RateLimitReq{ diff --git a/version b/version index 2003b639..9c218192 100644 --- a/version +++ b/version @@ -1 +1 @@ -0.9.2 +1.0.0-rc.1