forked from benmanns/goworker
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgoworker.go
268 lines (230 loc) · 6.32 KB
/
goworker.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
package goworker
import (
"crypto/tls"
"crypto/x509"
"encoding/json"
"fmt"
"io/ioutil"
"os"
"strconv"
"sync"
"time"
"github.com/go-redis/redis/v7"
"golang.org/x/net/context"
"github.com/cihub/seelog"
)
var (
logger seelog.LoggerInterface
client *redis.Client
ctx context.Context
initMutex sync.Mutex
initialized bool
)
const (
keyForCleaningExpiredRetries = "cleaning_expired_retried_in_progress"
)
var (
cleaningExpiredRetriesInterval = time.Minute
)
var workerSettings WorkerSettings
type WorkerSettings struct {
QueuesString string
Queues queuesFlag
IntervalFloat float64
Interval intervalFlag
Concurrency int
Connections int
URI string
Namespace string
ExitOnComplete bool
IsStrict bool
UseNumber bool
SkipTLSVerify bool
TLSCertPath string
MaxAgeRetries time.Duration
ForcePrune bool
closed chan struct{}
}
func SetSettings(settings WorkerSettings) {
workerSettings = settings
}
// Init initializes the goworker process. This will be
// called by the Work function, but may be used by programs
// that wish to access goworker functions and configuration
// without actually processing jobs.
func Init() error {
initMutex.Lock()
defer initMutex.Unlock()
if !initialized {
var err error
logger, err = seelog.LoggerFromWriterWithMinLevel(os.Stdout, seelog.InfoLvl)
if err != nil {
return err
}
if err := flags(); err != nil {
return err
}
ctx = context.Background()
opts, err := redis.ParseURL(workerSettings.URI)
if err != nil {
return err
}
if len(workerSettings.TLSCertPath) > 0 {
certPool, err := getCertPool()
if err != nil {
return err
}
opts.TLSConfig = &tls.Config{
RootCAs: certPool,
InsecureSkipVerify: workerSettings.SkipTLSVerify,
}
}
client = redis.NewClient(opts).WithContext(ctx)
err = client.Ping().Err()
if err != nil {
return err
}
workerSettings.closed = make(chan struct{})
initialized = true
}
return nil
}
func getCertPool() (*x509.CertPool, error) {
rootCAs, _ := x509.SystemCertPool()
if rootCAs == nil {
rootCAs = x509.NewCertPool()
}
certs, err := ioutil.ReadFile(workerSettings.TLSCertPath)
if err != nil {
return nil, fmt.Errorf("failed to read %q for the RootCA pool: %v", workerSettings.TLSCertPath, err)
}
if ok := rootCAs.AppendCertsFromPEM(certs); !ok {
return nil, fmt.Errorf("failed to append %q to the RootCA pool: %v", workerSettings.TLSCertPath, err)
}
return rootCAs, nil
}
// Close cleans up resources initialized by goworker. This
// will be called by Work when cleaning up. However, if you
// are using the Init function to access goworker functions
// and configuration without processing jobs by calling
// Work, you should run this function when cleaning up.
func Close() error {
initMutex.Lock()
defer initMutex.Unlock()
if initialized {
err := client.Close()
if err != nil {
return err
}
initialized = false
close(workerSettings.closed)
}
return nil
}
// Closed will return a channel that will be
// closed once the full process is done closing
// and cleaning all the workers
func Closed() <-chan struct{} {
return workerSettings.closed
}
// Work starts the goworker process. Check for errors in
// the return value. Work will take over the Go executable
// and will run until a QUIT, INT, or TERM signal is
// received, or until the queues are empty if the
// -exit-on-complete flag is set.
func Work() error {
err := Init()
if err != nil {
return err
}
defer Close()
quit := signals()
poller, err := newPoller(workerSettings.Queues, workerSettings.IsStrict)
if err != nil {
return err
}
jobs, err := poller.poll(time.Duration(workerSettings.Interval), quit)
if err != nil {
return err
}
var monitor sync.WaitGroup
var wk *worker
for id := 0; id < workerSettings.Concurrency; id++ {
worker, err := newWorker(strconv.Itoa(id), workerSettings.Queues)
if err != nil {
return err
}
if wk == nil {
wk = worker
}
worker.work(jobs, &monitor)
}
// Once all the workers have started we prune the dead ones
// this way we prevent from pruning workers that have just
// started and not registered to the Heartbeat in case
// of ForcePrune is enabled.
wk.pruneDeadWorkers(client)
if hasToCleanRetries() {
cleanExpiredRetryTicker := time.NewTicker(cleaningExpiredRetriesInterval)
waitChan := make(chan struct{})
go func() {
monitor.Wait()
close(waitChan)
}()
for {
select {
case <-cleanExpiredRetryTicker.C:
cleanExpiredRetries()
case <-waitChan:
cleanExpiredRetryTicker.Stop()
return nil
}
}
}
monitor.Wait()
return nil
}
func hasToCleanRetries() bool {
return workerSettings.MaxAgeRetries != 0
}
func cleanExpiredRetries() {
// This is used to set a lock so this operation is not done by more than 1 worker at the same time
ok, err := client.SetNX(fmt.Sprintf("%s%s", workerSettings.Namespace, keyForCleaningExpiredRetries), os.Getpid(), cleaningExpiredRetriesInterval/2).Result()
if err != nil {
logger.Criticalf("Error on setting lock to clean retries: %v", err)
return
}
if !ok {
return
}
failures, err := client.LRange(fmt.Sprintf("%sfailed", workerSettings.Namespace), 0, -1).Result()
if err != nil {
logger.Criticalf("Error on getting list of all failed jobs: %v", err)
return
}
for i, fail := range failures {
var f failure
err = json.Unmarshal([]byte(fail), &f)
if err != nil {
logger.Criticalf("Error on unmarshaling failure: %v", err)
return
}
ra, err := f.GetRetriedAtTime()
if err != nil {
logger.Criticalf("Error on GetRetriedAtTime of failure job %q: %v", fail, err)
return
}
if ra == *new(time.Time) {
continue
}
// If the RetryAt has exceeded the MaxAgeRetries then we'll
// remove the job from the list of failed jobs
if ra.Add(workerSettings.MaxAgeRetries).Before(time.Now()) {
hopefullyUniqueValueWeCanUseToDeleteJob := ""
// This logic what it does it replace first the value (with the LSet) and then remove the first
// occurrence on the failed queue of the replaced value. This value is the 'hopefullyUniqueValueWeCanUseToDeleteJob'
client.LSet(fmt.Sprintf("%sfailed", workerSettings.Namespace), int64(i), hopefullyUniqueValueWeCanUseToDeleteJob)
client.LRem(fmt.Sprintf("%sfailed", workerSettings.Namespace), 1, hopefullyUniqueValueWeCanUseToDeleteJob)
}
}
}