diff --git a/README.md b/README.md index 38da7e5..e292077 100644 --- a/README.md +++ b/README.md @@ -243,7 +243,8 @@ Default configuration file path: `/nxs-data-anonymizer.conf`. The file is repres | `logfile` | String | No | `stderr` | Log file path. You may also use `stdout` and `stderr` | | `loglevel` | String | No | `info` | Log level. Available values: `debug`, `warn`, `error` and `info` | | `progress` | [Progress](#progress-settings) | No | - | Anonymization progress logging | -| `filters` | Map of [Filters](#filters-settings) | No | - | Filters set for specified tables (key as a table name). Note: for PgSQL you also need to specify a scheme (e.g. `public.tablename`) | +| `variables` | Map of [Variables](#variables-settings) (key: variable name) | No | - | Global variables to be used in a filters. Variables are set at the init of application and remain unchanged during the runtime | +| `filters` | Map of [Filters](#filters-settings) (key: table name) | No | - | Filters set for specified tables (key as a table name). Note: for PgSQL you also need to specify a scheme (e.g. `public.tablename`) | | `security` | [Security](#security-settings) | No | - | Security enforcement for anonymizer | @@ -254,13 +255,20 @@ Default configuration file path: `/nxs-data-anonymizer.conf`. The file is repres | `rhythm` | String | No | `0s` | Frequency write into the log a read bytes count. Progress will be written to the log only when this option is specified and has none-zero value. You may use a human-readable values (e.g. `30s`, `5m`, etc) | | `humanize` | Bool | No | `false` | Set this option to `true` if you need to write into the log a read bytes count in a human-readable format. On `false` raw bytes count will be written to the log | +##### Variables settings + +| Option | Type | Required | Default value | Description | +|--- | :---: | :---: | :---: |--- | +| `type` | String | No | `template` | Type of field `value`: `template` and `command` are available | +| `value` | String | Yes | - | The value to be used as global variable value within the filters. In accordance with the `type` this value may be either `Go template` or `command`. See below for details| + ##### Filters settings Filters description for specified table. | Option | Type | Required | Default value | Description | |--- | :---: | :---: | :---: |--- | -| `columns` | Map of [Columns](#columns-settings) | No | - | Filter rules for specified columns of table (key as a column name) | +| `columns` | Map of [Columns](#columns-settings) (key: column name) | No | - | Filter rules for specified columns of table (key as a column name) | ###### Columns settings @@ -272,12 +280,16 @@ Filters description for specified table. **Go template** -To anonymize a database fields you may use a Go template with the [Sprig template library's](https://masterminds.github.io/sprig/) functions. You may also use values of other columns in the rules for same row (with values before substitutions). +To anonymize a database fields you may use a Go template with the [Sprig template library's](https://masterminds.github.io/sprig/) functions. Additional filter functions: - `null`: set a field value to `NULL` - `isNull`: compare a field value with `NULL` +You may also use the following data in a templates: +- Values of other columns in the rules for same row (with values before substitutions). Statement: `{{ .Values.COLUMN_NAME }}` (e.g.: `{{ .Values.username }}`) +- Global variables. Statement: `{{ .Variables.VARIABLE_NAME }}` (e.g.: `{{ .Variables.password }}`) + **Command** To anonymize a database fields you may use a commands (scripts or binaries) with any logic you need. The command's concept has following properties: @@ -286,6 +298,7 @@ To anonymize a database fields you may use a commands (scripts or binaries) with - Environment variables with the row data are available within the command: - `ENVVARTABLE`: contains a name of the filtered table - `ENVVARCURCOLUMN`: contains the current column name + - `ENVVARGLOBAL_{VARIABLE_NAME}`: contains value for specified global variable - `ENVVARCOLUMN_{COLUMN_NAME}`: contains values (before substitutions) for all columns for the current row ##### Security settings @@ -400,6 +413,13 @@ You need to get a dump with fake values: In accordance with these conditions, the nxs-data-anonymizer config may look like this: ```yaml +variables: + adminPassword: + type: template + value: "preset_admin_password" + adminAPIKey: + value: "preset_admin_api_key" + filters: public.users: columns: @@ -410,7 +430,7 @@ filters: value: /path/to/script.sh unique: true api_key: - value: "{{ if eq .Values.username \"admin\" }}preset_admin_api_key{{ else }}{{- randAlphaNum 50 | nospace | lower -}}{{ end }}" + value: "{{ if eq .Values.username \"admin\" }}{{ .Variables.adminAPIKey }}{{ else }}{{- randAlphaNum 50 | nospace | lower -}}{{ end }}" unique: true ``` @@ -422,7 +442,7 @@ The `/path/to/script.sh` script content is following: # Print preset password if current user is admin if [ "$ENVVARCOLUMN_username" == "admin" ]; then - echo -n "preset_admin_password" + echo -n "$ENVVARGLOBAL_adminPassword" exit 0 fi diff --git a/ctx/conf.go b/ctx/conf.go index a77ba06..8235ac8 100644 --- a/ctx/conf.go +++ b/ctx/conf.go @@ -11,9 +11,10 @@ type confOpts struct { LogFile string `conf:"logfile" conf_extraopts:"default=stderr"` LogLevel string `conf:"loglevel" conf_extraopts:"default=info"` - Progress progressConf `conf:"progress"` - Filters map[string]filterConf `conf:"filters"` - Security securityConf `conf:"security"` + Progress progressConf `conf:"progress"` + Filters map[string]filterConf `conf:"filters"` + Security securityConf `conf:"security"` + Variables map[string]variableFilterConf `conf:"variables"` MySQL *mysqlConf `conf:"mysql"` } @@ -33,6 +34,11 @@ type columnFilterConf struct { Unique bool `conf:"unique"` } +type variableFilterConf struct { + Type string `conf:"type" conf_extraopts:"default=template"` + Value string `conf:"value" conf_extraopts:"required"` +} + type securityConf struct { Policy securityPolicyConf `conf:"policy"` Exceptions securityExceptionsConf `conf:"exceptions"` @@ -83,11 +89,17 @@ func confRead(confPath string) (confOpts, error) { for _, f := range c.Filters { for _, cf := range f.Columns { if misc.ValueTypeFromString(cf.Type) == misc.ValueTypeUnknown { - return c, fmt.Errorf("conf read: unknown filter type") + return c, fmt.Errorf("conf read: unknown column filter type") } } } + for _, f := range c.Variables { + if misc.ValueTypeFromString(f.Type) == misc.ValueTypeUnknown { + return c, fmt.Errorf("conf read: unknown variable filter type") + } + } + if misc.SecurityPolicyTablesTypeFromString(c.Security.Policy.Tables) == misc.SecurityPolicyTablesUnknown { return c, fmt.Errorf("conf read: unknown security policy tables type") } diff --git a/ctx/context.go b/ctx/context.go index 02dbaec..3890ef9 100644 --- a/ctx/context.go +++ b/ctx/context.go @@ -139,6 +139,17 @@ func AppCtxInit() (any, error) { c.PR = progressreader.Init(ir) + vr := func() map[string]relfilter.VariableRuleOpts { + rules := make(map[string]relfilter.VariableRuleOpts) + for n, f := range conf.Variables { + rules[n] = relfilter.VariableRuleOpts{ + Type: misc.ValueType(f.Type), + Value: f.Value, + } + } + return rules + }() + tr := func() map[string]map[string]relfilter.ColumnRuleOpts { tables := make(map[string]map[string]relfilter.ColumnRuleOpts) for t, cs := range conf.Filters { @@ -190,6 +201,7 @@ func AppCtxInit() (any, error) { c.Anonymizer, err = mysql_anonymize.Init( c.PR, mysql_anonymize.InitOpts{ + Variables: vr, Security: mysql_anonymize.SecurityOpts{ TablesPolicy: misc.SecurityPolicyTablesType(conf.Security.Policy.Tables), ColumnsPolicy: misc.SecurityPolicyColumnsTypeFromString(conf.Security.Policy.Columns), @@ -213,6 +225,7 @@ func AppCtxInit() (any, error) { c.Anonymizer, err = pgsql_anonymize.Init( c.PR, pgsql_anonymize.InitOpts{ + Variables: vr, Security: pgsql_anonymize.SecurityOpts{ TablesPolicy: misc.SecurityPolicyTablesType(conf.Security.Policy.Tables), ColumnsPolicy: misc.SecurityPolicyColumnsTypeFromString(conf.Security.Policy.Columns), diff --git a/misc/template.go b/misc/template.go index 1067fa1..e919553 100644 --- a/misc/template.go +++ b/misc/template.go @@ -12,29 +12,39 @@ var null = "::NULL::" type TemplateData struct { TableName string Values map[string][]byte + Variables map[string]string } // TemplateExec makes message from given template `tpl` and data `d` -func TemplateExec(tpl string, d TemplateData) ([]byte, error) { - - var b bytes.Buffer +func TemplateExec(tpl string, d *TemplateData) ([]byte, error) { type tplData struct { TableName string Values map[string]string + Variables map[string]string } - td := tplData{ - TableName: d.TableName, - Values: make(map[string]string), - } + var ( + b bytes.Buffer + td *tplData + ) + + if d != nil { + td = &tplData{ + TableName: d.TableName, + Values: make(map[string]string), + Variables: make(map[string]string), + } - for k, v := range d.Values { - if v == nil { - td.Values[k] = null - } else { - td.Values[k] = string(v) + for k, v := range d.Values { + if v == nil { + td.Values[k] = null + } else { + td.Values[k] = string(v) + } } + + td.Variables = d.Variables } // See http://masterminds.github.io/sprig/ for details diff --git a/modules/anonymizers/mysql/mysql.go b/modules/anonymizers/mysql/mysql.go index d9ee9a5..ebddbea 100644 --- a/modules/anonymizers/mysql/mysql.go +++ b/modules/anonymizers/mysql/mysql.go @@ -17,8 +17,9 @@ type MySQL struct { } type InitOpts struct { - Security SecurityOpts - Rules RulesOpts + Variables map[string]relfilter.VariableRuleOpts + Security SecurityOpts + Rules RulesOpts } type RulesOpts struct { @@ -116,6 +117,7 @@ func userCtxInit(s InitOpts) (*userCtx, error) { f, err := relfilter.Init( relfilter.InitOpts{ + Variables: s.Variables, TableRules: s.Rules.TableRules, DefaultRules: s.Rules.DefaultRules, ExceptionColumns: s.Rules.ExceptionColumns, diff --git a/modules/anonymizers/pgsql/pgsql.go b/modules/anonymizers/pgsql/pgsql.go index 88da164..25f4217 100644 --- a/modules/anonymizers/pgsql/pgsql.go +++ b/modules/anonymizers/pgsql/pgsql.go @@ -17,8 +17,9 @@ type PgSQL struct { } type InitOpts struct { - Security SecurityOpts - Rules RulesOpts + Variables map[string]relfilter.VariableRuleOpts + Security SecurityOpts + Rules RulesOpts } type RulesOpts struct { @@ -60,6 +61,7 @@ func userCtxInit(s InitOpts) (*userCtx, error) { f, err := relfilter.Init( relfilter.InitOpts{ + Variables: s.Variables, TableRules: s.Rules.TableRules, DefaultRules: s.Rules.DefaultRules, ExceptionColumns: s.Rules.ExceptionColumns, diff --git a/modules/filters/relfilter/filter.go b/modules/filters/relfilter/filter.go index 9f0e7c1..2779eeb 100644 --- a/modules/filters/relfilter/filter.go +++ b/modules/filters/relfilter/filter.go @@ -10,6 +10,8 @@ import ( ) type InitOpts struct { + Variables map[string]VariableRuleOpts + TableRules map[string]map[string]ColumnRuleOpts DefaultRules map[string]ColumnRuleOpts ExceptionColumns []string @@ -29,6 +31,11 @@ type ColumnRuleOpts struct { Unique bool } +type VariableRuleOpts struct { + Type misc.ValueType + Value string +} + type Filter struct { // Rules for filter a table values @@ -43,6 +50,8 @@ type Row struct { } type rules struct { + variables map[string]string + tableRules map[string]map[string]ColumnRuleOpts defaultRules map[string]ColumnRuleOpts exceptionColumns map[string]any @@ -70,6 +79,7 @@ type rowValue struct { const uniqueAttempts = 5 const ( + envVarGlobalPrefix = "ENVVARGLOBAL_" envVarTable = "ENVVARTABLE" envVarColumnPrefix = "ENVVARCOLUMN_" envVarCurColumn = "ENVVARCURCOLUMN" @@ -126,8 +136,18 @@ func Init(opts InitOpts) (*Filter, error) { excpts[e] = nil } + vars := make(map[string]string) + for n, f := range opts.Variables { + v, err := makeVariable(f) + if err != nil { + return nil, fmt.Errorf("filter init: %w", err) + } + vars[n] = v + } + return &Filter{ rules: rules{ + variables: vars, tableRules: opts.TableRules, defaultRules: opts.DefaultRules, exceptionColumns: excpts, @@ -298,12 +318,20 @@ func (filter *Filter) applyRules(tname string, rls []applyRule) error { td := misc.TemplateData{ TableName: tname, Values: make(map[string][]byte), + Variables: filter.rules.variables, } tdenv := []string{ fmt.Sprintf("%s=%s", envVarTable, tname), } + for n, v := range filter.rules.variables { + tdenv = append( + tdenv, + fmt.Sprintf("%s%s=%s", envVarGlobalPrefix, n, v), + ) + } + for i, c := range filter.tableData.columns.cc { td.Values[c.n] = filter.tableData.values[i].V @@ -324,7 +352,7 @@ func (filter *Filter) applyRules(tname string, rls []applyRule) error { fmt.Sprintf("%s=%s", envVarCurColumn, r.c.n), ) - v, err := filter.applyFilter(r.c.n, r.cr, td, tde) + v, err := filter.applyColumnFilter(r.c.n, r.cr, td, tde) if err != nil { return fmt.Errorf("rules: %w", err) } @@ -336,7 +364,7 @@ func (filter *Filter) applyRules(tname string, rls []applyRule) error { return nil } -func (filter *Filter) applyFilter(cn string, cr ColumnRuleOpts, td misc.TemplateData, tde []string) ([]byte, error) { +func (filter *Filter) applyColumnFilter(cn string, cr ColumnRuleOpts, td misc.TemplateData, tde []string) ([]byte, error) { for i := 0; i < uniqueAttempts; i++ { @@ -349,7 +377,7 @@ func (filter *Filter) applyFilter(cn string, cr ColumnRuleOpts, td misc.Template case misc.ValueTypeTemplate: v, err = misc.TemplateExec( cr.Value, - td, + &td, ) if err != nil { return []byte{}, fmt.Errorf("filter: value compile template: %w", err) @@ -422,3 +450,47 @@ func bcopy(b []byte) []byte { return d } + +func makeVariable(cr VariableRuleOpts) (string, error) { + + var ( + v []byte + err error + ) + + switch cr.Type { + case misc.ValueTypeTemplate: + v, err = misc.TemplateExec( + cr.Value, + nil, + ) + if err != nil { + return "", fmt.Errorf("variable: value compile template: %w", err) + } + case misc.ValueTypeCommand: + + var stderr, stdout bytes.Buffer + + cmd := exec.Command(cr.Value) + + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + if err := cmd.Run(); err != nil { + + e, b := err.(*exec.ExitError) + if b == false { + return "", fmt.Errorf("variable: value exec command: %w", err) + } + + return "", fmt.Errorf("variable: value exec command: bad exit code %d: %s", e.ExitCode(), stderr.String()) + } + + v = stdout.Bytes() + + default: + return "", fmt.Errorf("variable: value compile: unknown type") + } + + return string(bytes.ReplaceAll(v, []byte("\n"), []byte("\\n"))), nil +}