-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* initial bq_load worker * add truncate, append and delete options * add app descriptions * update go modules * add unit tests * add lazy column names in sql-read
- Loading branch information
1 parent
460c8a4
commit 51c4fb1
Showing
8 changed files
with
515 additions
and
94 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
package main | ||
|
||
import ( | ||
"errors" | ||
"strings" | ||
|
||
tools "github.com/pcelvng/task-tools" | ||
"github.com/pcelvng/task-tools/bootstrap" | ||
) | ||
|
||
const ( | ||
taskType = "bq_load" | ||
desc = `load a delimited json file into BigQuery | ||
info params | ||
- origin: (required) file to be loaded (gs://path/file.json) | ||
- destination: (required) project.dataset.table to be insert into | ||
- truncate: truncate the table (delete ALL and insert). Default behavior is to append data | ||
- delete: map field defines the column and values to delete before inserting (delete=id:10|date:2020-01-02) | ||
example | ||
{"task":"bq_load", "info":"gs://my/data.json?destination=project.reports.impressions&delete=date:2020-01-02|id:11"}` | ||
) | ||
|
||
type options struct { | ||
BqAuth string `toml:"bq_auth" comment:"file path to service file"` | ||
} | ||
|
||
func (o *options) Validate() error { | ||
return nil | ||
} | ||
|
||
func main() { | ||
opts := &options{} | ||
app := bootstrap.NewWorkerApp(taskType, opts.NewWorker, opts). | ||
Description(desc). | ||
Version(tools.Version).Initialize() | ||
|
||
app.Run() | ||
} | ||
|
||
type Destination struct { | ||
Project string | ||
Dataset string | ||
Table string | ||
} | ||
|
||
func (d *Destination) UnmarshalText(text []byte) error { | ||
l := strings.Split(string(text), ".") | ||
if len(l) != 3 || len(l[0]) == 0 || len(l[1]) == 0 || len(l[2]) == 0 { | ||
return errors.New("requires (project.dataset.table)") | ||
} | ||
|
||
d.Project, d.Dataset, d.Table = l[0], l[1], l[2] | ||
return nil | ||
} | ||
|
||
func (d Destination) String() string { | ||
return d.Project + "." + d.Dataset + "." + d.Table | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
package main | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"sort" | ||
"strconv" | ||
"strings" | ||
|
||
"cloud.google.com/go/bigquery" | ||
"github.com/dustin/go-humanize" | ||
"github.com/jbsmith7741/uri" | ||
"github.com/pcelvng/task" | ||
"google.golang.org/api/option" | ||
) | ||
|
||
type worker struct { | ||
task.Meta | ||
options | ||
|
||
Destination `uri:"dest_table" required:"true"` | ||
File string `uri:"origin" required:"true"` | ||
Truncate bool `uri:"truncate"` | ||
Append bool `uri:"append"` | ||
DeleteMap map[string]string `uri:"delete"` // will replace the data by removing current data | ||
delete bool | ||
} | ||
|
||
func (o *options) NewWorker(info string) task.Worker { | ||
w := &worker{ | ||
Meta: task.NewMeta(), | ||
options: *o, | ||
} | ||
err := uri.Unmarshal(info, w) | ||
if err != nil { | ||
return task.InvalidWorker(err.Error()) | ||
} | ||
|
||
// verify options | ||
w.delete = len(w.DeleteMap) > 0 | ||
if w.delete && w.Truncate { | ||
return task.InvalidWorker("truncate and delete options must be selected independently") | ||
} | ||
|
||
if !(w.delete || w.Truncate || w.Append) { | ||
return task.InvalidWorker("insert rule required (append|truncate|delete)") | ||
} | ||
if w.delete { | ||
w.Append = true | ||
} | ||
|
||
return w | ||
} | ||
|
||
func (w *worker) DoTask(ctx context.Context) (task.Result, string) { | ||
opts := make([]option.ClientOption, 0) | ||
if w.BqAuth != "" { | ||
opts = append(opts, option.WithCredentialsFile(w.BqAuth)) | ||
} | ||
client, err := bigquery.NewClient(ctx, w.Project, opts...) | ||
if err != nil { | ||
return task.Failf("bigquery client init %s", err) | ||
} | ||
|
||
bqRef := bigquery.NewGCSReference(w.File) | ||
bqRef.SourceFormat = bigquery.JSON | ||
bqRef.MaxBadRecords = 1 | ||
|
||
loader := client.Dataset(w.Dataset).Table(w.Table).LoaderFrom(bqRef) | ||
loader.WriteDisposition = bigquery.WriteAppend | ||
if len(w.DeleteMap) > 0 { | ||
q := delStatement(w.DeleteMap, w.Destination) | ||
j, err := client.Query(q).Run(ctx) | ||
if err != nil { | ||
return task.Failf("delete statement: %s", err) | ||
} | ||
status, err := j.Wait(ctx) | ||
if err != nil { | ||
return task.Failf("delete wait: %s", err) | ||
} | ||
if status.Err() != nil { | ||
return task.Failf("delete: %s", err) | ||
} | ||
status = j.LastStatus() | ||
if qSts, ok := status.Statistics.Details.(*bigquery.QueryStatistics); ok { | ||
w.SetMeta("rows_del", strconv.FormatInt(qSts.NumDMLAffectedRows, 10)) | ||
} | ||
} | ||
|
||
if w.Truncate { | ||
loader.WriteDisposition = bigquery.WriteTruncate | ||
} | ||
|
||
job, err := loader.Run(ctx) | ||
if err != nil { | ||
return task.Failf("loader run: %s", err) | ||
} | ||
status, err := job.Wait(ctx) | ||
if err == nil { | ||
if status.Err() != nil { | ||
return task.Failf("job completed with error: %v", status.Err()) | ||
} | ||
if sts, ok := status.Statistics.Details.(*bigquery.LoadStatistics); ok { | ||
w.SetMeta("rows_insert", strconv.FormatInt(sts.OutputRows, 10)) | ||
return task.Completed("%d rows (%s) loaded", sts.OutputRows, humanize.Bytes(uint64(sts.OutputBytes))) | ||
} | ||
} | ||
|
||
return task.Completed("completed") | ||
} | ||
|
||
func delStatement(m map[string]string, d Destination) string { | ||
s := make([]string, 0) | ||
for k, v := range m { | ||
s = append(s, k+" = "+v) | ||
} | ||
sort.Sort(sort.StringSlice(s)) | ||
return fmt.Sprintf("delete from `%s.%s.%s` where %s", d.Project, d.Dataset, d.Table, strings.Join(s, " and ")) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
package main | ||
|
||
import ( | ||
"errors" | ||
"testing" | ||
|
||
"github.com/hydronica/trial" | ||
"github.com/pcelvng/task" | ||
) | ||
|
||
func TestNewWorker(t *testing.T) { | ||
fn := func(i trial.Input) (interface{}, error) { | ||
o := &options{} | ||
w := o.NewWorker(i.String()) | ||
if invalid, s := task.IsInvalidWorker(w); invalid { | ||
return nil, errors.New(s) | ||
} | ||
|
||
return w, nil | ||
} | ||
cases := trial.Cases{ | ||
"required fields": { | ||
Input: "", | ||
ExpectedErr: errors.New("origin is required"), | ||
}, | ||
"invalid destination": { | ||
Input: "gs://file.json?dest_table=apple", | ||
ExpectedErr: errors.New("requires (project.dataset.table)"), | ||
}, | ||
"missing insert rule": { | ||
Input: "gs://file.json?dest_table=p.d.t", | ||
ExpectedErr: errors.New("insert rule required"), | ||
}, | ||
"append": { | ||
Input: "gs://file.json?dest_table=p.d.t&append", | ||
Expected: &worker{ | ||
Meta: task.NewMeta(), | ||
File: "gs://file.json", | ||
Destination: Destination{"p", "d", "t"}, | ||
Append: true, | ||
}, | ||
}, | ||
"truncate": { | ||
Input: "gs://file.json?dest_table=p.d.t&truncate", | ||
Expected: &worker{ | ||
Meta: task.NewMeta(), | ||
File: "gs://file.json", | ||
Destination: Destination{"p", "d", "t"}, | ||
Truncate: true, | ||
}, | ||
}, | ||
"delete": { | ||
Input: "gs://file.json?dest_table=p.d.t&delete=id:10", | ||
Expected: &worker{ | ||
Meta: task.NewMeta(), | ||
File: "gs://file.json", | ||
Destination: Destination{"p", "d", "t"}, | ||
delete: true, | ||
Append: true, | ||
DeleteMap: map[string]string{"id": "10"}, | ||
}, | ||
}, | ||
"invalid delete": { | ||
Input: "gs://file.json?dest_table=p.d.t&delete=id:10&truncate", | ||
ExpectedErr: errors.New("truncate and delete"), | ||
}, | ||
} | ||
trial.New(fn, cases).Test(t) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.