diff --git a/README.md b/README.md index 92e7098..38da7e5 100644 --- a/README.md +++ b/README.md @@ -270,6 +270,24 @@ Filters description for specified table. | `value` | String | Yes | - | The value to be used to replace at every cell in specified column. In accordance with the `type` this value may be either `Go template` or `command`. See below for details| | `unique` | Bool | No | `false` | If true checks the generated value for cell is unique whole the column | +**Go template** + +To anonymize a database fields you may use a Go template with the [Sprig template library's](https://masterminds.github.io/sprig/) functions. You may also use values of other columns in the rules for same row (with values before substitutions). + +Additional filter functions: +- `null`: set a field value to `NULL` +- `isNull`: compare a field value with `NULL` + +**Command** + +To anonymize a database fields you may use a commands (scripts or binaries) with any logic you need. The command's concept has following properties: +- The command's `stdout` will be used as a new value for the anonymized field +- Command must return zero exit code, otherwise nxs-data-anonymizer will falls with error (in this case `stderr` will be used as an error text) +- Environment variables with the row data are available within the command: + - `ENVVARTABLE`: contains a name of the filtered table + - `ENVVARCURCOLUMN`: contains the current column name + - `ENVVARCOLUMN_{COLUMN_NAME}`: contains values (before substitutions) for all columns for the current row + ##### Security settings | Option | Type | Required | Default value | Description | @@ -283,7 +301,7 @@ Filters description for specified table. | Option | Type | Required | Default value | Description | |--- | :---: | :---: | :---: |--- | | `tables` | String | No | `pass` | Security policy for tables. If value `skip` is used all undescribed tables in config will be skipped while anonymization | -| `columns` | String | No | `pass` | Security policy for columns. If value `randomize` is used all undescribed columns in config will be randomized (with respect to types) while anonymization | +| `columns` | String | No | `pass` | Security policy for columns. If value `randomize` is used all undescribed columns in config will be randomized (with default rules in accordance to types) while anonymization | _Values to masquerade a columns in accordance with the types see below._ @@ -356,25 +374,14 @@ _Values to masquerade a columns in accordance with the types see below._ | Option | Type | Required | Default value | Description | |--- | :---: | :---: | :---: |--- | | `columns` | Map of Filters | No | - | Default filter for columns (in any table). That filters will be applied for columns with this names without described filters | +| `types` | Slice of [Types](#types-settings) | No | - | Custom filters for types (in any table). With this filter rules you may override default filters for types | +###### Types settings -**Go template** - -To anonymize a database fields you may use a Go template with the [Sprig template library's](https://masterminds.github.io/sprig/) functions. You may also use values of other columns in the rules for same row (with values before substitutions). - -Additional filter functions: -- `null`: set a field value to `NULL` -- `isNull`: compare a field value with `NULL` - -**Command** - -To anonymize a database fields you may use a commands (scripts or binaries) with any logic you need. The command's concept has following properties: -- The command's `stdout` will be used as a new value for the anonymized field -- Command must return zero exit code, otherwise nxs-data-anonymizer will falls with error (in this case `stderr` will be used as an error text) -- Environment variables with the row data are available within the command: - - `ENVVARTABLE`: contains a name of the filtered table - - `ENVVARCURCOLUMN`: contains the current column name - - `ENVVARCOLUMN_{COLUMN_NAME}`: contains values (before substitutions) for all columns for the current row +| Option | Type | Required | Default value | Description | +|--- | :---: | :---: | :---: |--- | +| `regex` | String | Yes | - | Regular expression. Will be checked for match for column data type (in `CREATE TABLE` section) | +| `rule` | [Columns](#columns-settings) | Yes | - | Rule will be applied columns with data types matched for specified regular expression | #### Example diff --git a/ctx/conf.go b/ctx/conf.go index 1d03e47..a77ba06 100644 --- a/ctx/conf.go +++ b/ctx/conf.go @@ -36,7 +36,7 @@ type columnFilterConf struct { type securityConf struct { Policy securityPolicyConf `conf:"policy"` Exceptions securityExceptionsConf `conf:"exceptions"` - Defaults filterConf `conf:"defaults"` + Defaults securityDefaultsConf `conf:"defaults"` } type securityPolicyConf struct { @@ -49,6 +49,16 @@ type securityExceptionsConf struct { Columns []string `conf:"columns"` } +type securityDefaultsConf struct { + Columns map[string]columnFilterConf `conf:"columns"` + Types []securityDefaultsTypeConf `conf:"types"` +} + +type securityDefaultsTypeConf struct { + Regex string `conf:"regex" conf_extraopts:"required"` + Rule columnFilterConf `conf:"rule" conf_extraopts:"required"` +} + type mysqlConf struct { Host string `conf:"host" conf_extraopts:"required"` Port int `conf:"port" conf_extraopts:"required"` diff --git a/ctx/context.go b/ctx/context.go index 34537d1..02dbaec 100644 --- a/ctx/context.go +++ b/ctx/context.go @@ -6,8 +6,10 @@ import ( "os" "time" + "github.com/nixys/nxs-data-anonymizer/interfaces" mysql_anonymize "github.com/nixys/nxs-data-anonymizer/modules/anonymizers/mysql" pgsql_anonymize "github.com/nixys/nxs-data-anonymizer/modules/anonymizers/pgsql" + progressreader "github.com/nixys/nxs-data-anonymizer/modules/progress_reader" "github.com/nixys/nxs-data-anonymizer/ds/mysql" "github.com/nixys/nxs-data-anonymizer/misc" @@ -19,13 +21,12 @@ import ( // Ctx defines application custom context type Ctx struct { - Log *logrus.Logger - Input io.Reader - Output io.Writer - Rules relfilter.Rules - Progress progressCtx - Security SecurityCtx - DB DBCtx + Log *logrus.Logger + Output io.Writer + Progress progressCtx + DB DBCtx + Anonymizer interfaces.Anonymizer + PR *progressreader.ProgressReader } type DBCtx struct { @@ -61,6 +62,8 @@ type SecurityCtx struct { // Init initiates application custom context func AppCtxInit() (any, error) { + var ir io.Reader + c := &Ctx{} args, err := ArgsRead() @@ -81,9 +84,9 @@ func AppCtxInit() (any, error) { } if args.Input == nil { - c.Input = os.Stdin + ir = os.Stdin } else { - c.Input, err = os.Open(*args.Input) + ir, err = os.Open(*args.Input) if err != nil { c.Log.WithFields(logrus.Fields{ "details": err, @@ -109,7 +112,7 @@ func AppCtxInit() (any, error) { Type: args.DBType, } - // Connect to MySQL if necessary + // DEPRECATED: Connect to MySQL if necessary if conf.MySQL != nil { m, err := mysql.Connect(mysql.Settings{ Host: conf.MySQL.Host, @@ -134,56 +137,103 @@ func AppCtxInit() (any, error) { } } - c.Rules.Tables = make(map[string]relfilter.TableRules) - - if misc.SecurityPolicyColumnsTypeFromString(conf.Security.Policy.Columns) == misc.SecurityPolicyColumnsRandomize { - switch args.DBType { - case DBTypeMySQL: - c.Rules.RandomizeTypes = mysql_anonymize.RandomizeTypesDefault - case DBTypePgSQL: - c.Rules.RandomizeTypes = pgsql_anonymize.RandomizeTypesDefault - } - } - - for t, f := range conf.Filters { - - c.Rules.Tables[t] = relfilter.TableRules{ - Columns: func() map[string]relfilter.ColumnRule { - cc := make(map[string]relfilter.ColumnRule) - for c, cf := range f.Columns { - cc[c] = relfilter.ColumnRule{ - Type: misc.ValueTypeFromString(cf.Type), - Value: cf.Value, - Unique: cf.Unique, - } + c.PR = progressreader.Init(ir) + + tr := func() map[string]map[string]relfilter.ColumnRuleOpts { + tables := make(map[string]map[string]relfilter.ColumnRuleOpts) + for t, cs := range conf.Filters { + columns := make(map[string]relfilter.ColumnRuleOpts) + for c, f := range cs.Columns { + columns[c] = relfilter.ColumnRuleOpts{ + Type: misc.ValueType(f.Type), + Value: f.Value, + Unique: f.Unique, } - return cc - }(), + } + tables[t] = columns } - } + return tables + }() - c.Rules.Defaults = relfilter.TableRules{ - Columns: func() map[string]relfilter.ColumnRule { - cc := make(map[string]relfilter.ColumnRule) - for c, cf := range conf.Security.Defaults.Columns { - cc[c] = relfilter.ColumnRule{ - Type: misc.ValueTypeFromString(cf.Type), - Value: cf.Value, - Unique: cf.Unique, - } + dr := func() map[string]relfilter.ColumnRuleOpts { + cc := make(map[string]relfilter.ColumnRuleOpts) + for c, cf := range conf.Security.Defaults.Columns { + cc[c] = relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeFromString(cf.Type), + Value: cf.Value, + Unique: cf.Unique, } - return cc - }(), - } + } + return cc + }() - c.Rules.ExceptionColumns = func() map[string]any { - v := make(map[string]any) - for _, e := range conf.Security.Exceptions.Columns { - v[e] = nil + trc := func() []relfilter.TypeRuleOpts { + cc := []relfilter.TypeRuleOpts{} + for _, t := range conf.Security.Defaults.Types { + cc = append( + cc, + relfilter.TypeRuleOpts{ + Selector: t.Regex, + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeFromString(t.Rule.Type), + Value: t.Rule.Value, + Unique: t.Rule.Unique, + }, + }, + ) } - return v + return cc }() + switch args.DBType { + case DBTypeMySQL: + c.Anonymizer, err = mysql_anonymize.Init( + c.PR, + mysql_anonymize.InitOpts{ + Security: mysql_anonymize.SecurityOpts{ + TablesPolicy: misc.SecurityPolicyTablesType(conf.Security.Policy.Tables), + ColumnsPolicy: misc.SecurityPolicyColumnsTypeFromString(conf.Security.Policy.Columns), + TableExceptions: conf.Security.Exceptions.Tables, + }, + Rules: mysql_anonymize.RulesOpts{ + TableRules: tr, + DefaultRules: dr, + ExceptionColumns: conf.Security.Exceptions.Columns, + TypeRuleCustom: trc, + }, + }, + ) + if err != nil { + c.Log.WithFields(logrus.Fields{ + "details": err, + }).Errorf("ctx init") + return nil, err + } + case DBTypePgSQL: + c.Anonymizer, err = pgsql_anonymize.Init( + c.PR, + pgsql_anonymize.InitOpts{ + Security: pgsql_anonymize.SecurityOpts{ + TablesPolicy: misc.SecurityPolicyTablesType(conf.Security.Policy.Tables), + ColumnsPolicy: misc.SecurityPolicyColumnsTypeFromString(conf.Security.Policy.Columns), + TableExceptions: conf.Security.Exceptions.Tables, + }, + Rules: pgsql_anonymize.RulesOpts{ + TableRules: tr, + DefaultRules: dr, + ExceptionColumns: conf.Security.Exceptions.Columns, + TypeRuleCustom: trc, + }, + }, + ) + if err != nil { + c.Log.WithFields(logrus.Fields{ + "details": err, + }).Errorf("ctx init") + return nil, err + } + } + // Progress settings c.Progress.Humanize = conf.Progress.Humanize @@ -195,17 +245,6 @@ func AppCtxInit() (any, error) { return nil, err } - c.Security = SecurityCtx{ - TablePolicy: misc.SecurityPolicyTablesTypeFromString(conf.Security.Policy.Tables), - TableExceptions: func() map[string]any { - v := make(map[string]any) - for _, e := range conf.Security.Exceptions.Tables { - v[e] = nil - } - return v - }(), - } - return c, nil } diff --git a/interfaces/anonymizer.go b/interfaces/anonymizer.go new file mode 100644 index 0000000..2f460aa --- /dev/null +++ b/interfaces/anonymizer.go @@ -0,0 +1,10 @@ +package interfaces + +import ( + "context" + "io" +) + +type Anonymizer interface { + Run(context.Context, io.Writer) error +} diff --git a/modules/anonymizers/mysql/dh.go b/modules/anonymizers/mysql/dh.go index ff747f9..9038d15 100644 --- a/modules/anonymizers/mysql/dh.go +++ b/modules/anonymizers/mysql/dh.go @@ -6,7 +6,6 @@ import ( "strings" "github.com/nixys/nxs-data-anonymizer/misc" - "github.com/nixys/nxs-data-anonymizer/modules/filters/relfilter" ) func dhSecurityInsertInto(usrCtx any, deferred, token []byte) ([]byte, error) { @@ -50,41 +49,44 @@ func dhCreateTableName(usrCtx any, deferred, token []byte) ([]byte, error) { func dhCreateTableFieldName(usrCtx any, deferred, token []byte) ([]byte, error) { uctx := usrCtx.(*userCtx) - uctx.column.name = string(deferred) + uctx.columnName = string(deferred) return append(deferred, token...), nil } -func dhCreateTableColumnTypeAdd(usrCtx any, deferred, token []byte) ([]byte, error) { +func dhCreateTableColumnAdd(usrCtx any, deferred, token []byte) ([]byte, error) { uctx := usrCtx.(*userCtx) - for k, v := range typeKeys { - if k == "generated" { - if k == string(token) || strings.ToUpper(k) == string(token) { - uctx.column.isSkip = true - break - } - } else { - if k == string(token) || strings.ToUpper(k) == string(token) { - uctx.column.columnType = v - break - } - } - } + traw := strings.TrimSpace(string(deferred)) + trawUpper := strings.ToUpper(traw) - return append(deferred, token...), nil -} + if strings.Contains(trawUpper, " GENERATED ") == false { -func dhCreateTableColumnAdd(usrCtx any, deferred, token []byte) ([]byte, error) { + i := strings.IndexAny(strings.TrimSpace(trawUpper), " (,") + if i != -1 { - uctx := usrCtx.(*userCtx) + ct := columnTypeNone + for k, v := range typeKeys { + if trawUpper[0:i] == k { + ct = v + } + } - if uctx.column.isSkip == false { - uctx.filter.ColumnAdd(uctx.column.name, uctx.column.columnType) + t, b := uctx.tables[uctx.filter.TableNameGet()] + if b { + t[uctx.columnName] = ct + } else { + t = make(map[string]columnType) + t[uctx.columnName] = ct + } + uctx.tables[uctx.filter.TableNameGet()] = t + + uctx.filter.ColumnAdd(uctx.columnName, traw) + } } - uctx.column = userColumnCtx{} + uctx.columnName = "" return append(deferred, token...), nil } @@ -167,7 +169,7 @@ func dhCreateTableValuesEnd(usrCtx any, deferred, token []byte) ([]byte, error) return []byte{}, err } - return rowDataGen(uctx.filter), nil + return rowDataGen(uctx), nil } func dhCreateTableValuesStringEnd(usrCtx any, deferred, token []byte) ([]byte, error) { @@ -183,14 +185,14 @@ func dhCreateTableValuesStringEnd(usrCtx any, deferred, token []byte) ([]byte, e return []byte{}, err } - return rowDataGen(uctx.filter), nil + return rowDataGen(uctx), nil } -func rowDataGen(filter *relfilter.Filter) []byte { +func rowDataGen(uctx *userCtx) []byte { var out string - row := filter.ValuePop() + row := uctx.filter.ValuePop() for i, v := range row.Values { @@ -201,7 +203,7 @@ func rowDataGen(filter *relfilter.Filter) []byte { if v.V == nil { out += "NULL" } else { - switch filter.ColumnTypeGet(i) { + switch uctx.tables[uctx.filter.TableNameGet()][uctx.filter.ColumnGetName(i)] { case columnTypeString: out += fmt.Sprintf("'%s'", v.V) case columnTypeBinary: @@ -221,7 +223,7 @@ func rowDataGen(filter *relfilter.Filter) []byte { func securityPolicyCheck(uctx *userCtx, tname string) bool { // Continue if security policy is `skip` - if uctx.security.tablePolicy != misc.SecurityPolicyTablesSkip { + if uctx.security.tablesPolicy != misc.SecurityPolicyTablesSkip { return true } diff --git a/modules/anonymizers/mysql/mysql.go b/modules/anonymizers/mysql/mysql.go index e29c2dc..d9ee9a5 100644 --- a/modules/anonymizers/mysql/mysql.go +++ b/modules/anonymizers/mysql/mysql.go @@ -2,8 +2,8 @@ package mysql_anonymize import ( "context" + "fmt" "io" - "strings" "github.com/nixys/nxs-data-anonymizer/misc" "github.com/nixys/nxs-data-anonymizer/modules/filters/relfilter" @@ -11,124 +11,158 @@ import ( fsm "github.com/nixys/nxs-go-fsm" ) -type InitSettings struct { - Security SecuritySettings - Rules relfilter.Rules +type MySQL struct { + uctx *userCtx + sourceReader io.Reader } -type SecuritySettings struct { - TablePolicy misc.SecurityPolicyTablesType - TableExceptions map[string]any +type InitOpts struct { + Security SecurityOpts + Rules RulesOpts } -type userCtx struct { - filter *relfilter.Filter - column userColumnCtx +type RulesOpts struct { + TableRules map[string]map[string]relfilter.ColumnRuleOpts + DefaultRules map[string]relfilter.ColumnRuleOpts + ExceptionColumns []string + TypeRuleCustom []relfilter.TypeRuleOpts +} - security securityCtx +type SecurityOpts struct { + TablesPolicy misc.SecurityPolicyTablesType + ColumnsPolicy misc.SecurityPolicyColumnsType + TableExceptions []string } -type userColumnCtx struct { - name string - columnType relfilter.ColumnType - isSkip bool +type userCtx struct { + filter *relfilter.Filter + columnName string + security securityCtx + tables map[string]map[string]columnType } type securityCtx struct { tmpBuf []byte isSkip bool - tablePolicy misc.SecurityPolicyTablesType + tablesPolicy misc.SecurityPolicyTablesType tableExceptions map[string]any } +type columnType string + const ( - columnTypeString relfilter.ColumnType = "string" - columnTypeNum relfilter.ColumnType = "numeric" - columnTypeBinary relfilter.ColumnType = "binary" + columnTypeNone columnType = "none" + columnTypeString columnType = "string" + columnTypeNum columnType = "numeric" + columnTypeBinary columnType = "binary" ) -var typeKeys = map[string]relfilter.ColumnType{ +func (c columnType) String() string { + return string(c) +} - // Special - "generated": relfilter.ColumnTypeNone, +var typeKeys = map[string]columnType{ // Strings - "char": columnTypeString, - "varchar": columnTypeString, - "tinytext": columnTypeString, - "text": columnTypeString, - "mediumtext": columnTypeString, - "longtext": columnTypeString, - "enum": columnTypeString, - "set": columnTypeString, - "date": columnTypeString, - "datetime": columnTypeString, - "timestamp": columnTypeString, - "time": columnTypeString, - "year": columnTypeString, - "json": columnTypeString, + "CHAR": columnTypeString, + "VARCHAR": columnTypeString, + "TINYTEXT": columnTypeString, + "TEXT": columnTypeString, + "MEDIUMTEXT": columnTypeString, + "LONGTEXT": columnTypeString, + "ENUM": columnTypeString, + "SET": columnTypeString, + "DATE": columnTypeString, + "DATETIME": columnTypeString, + "TIMESTAMP": columnTypeString, + "TIME": columnTypeString, + "YEAR": columnTypeString, + "JSON": columnTypeString, // Numeric - "bit": columnTypeNum, - "bool": columnTypeNum, - "boolean": columnTypeNum, - "tinyint": columnTypeNum, - "smallint": columnTypeNum, - "mediumint": columnTypeNum, - "int": columnTypeNum, - "integer": columnTypeNum, - "bigint": columnTypeNum, - "float": columnTypeNum, - "double": columnTypeNum, - "double precision": columnTypeNum, - "decimal": columnTypeNum, - "dec": columnTypeNum, + "BIT": columnTypeNum, + "BOOL": columnTypeNum, + "BOOLEAN": columnTypeNum, + "TINYINT": columnTypeNum, + "SMALLINT": columnTypeNum, + "MEDIUMINT": columnTypeNum, + "INT": columnTypeNum, + "INTEGER": columnTypeNum, + "BIGINT": columnTypeNum, + "FLOAT": columnTypeNum, + "DOUBLE": columnTypeNum, + "DOUBLE precision": columnTypeNum, + "DECIMAL": columnTypeNum, + "DEC": columnTypeNum, // Binary - "binary": columnTypeBinary, - "varbinary": columnTypeBinary, - "tinyblob": columnTypeBinary, - "blob": columnTypeBinary, - "mediumblob": columnTypeBinary, - "longblob": columnTypeBinary, + "BINARY": columnTypeBinary, + "VARBINARY": columnTypeBinary, + "TINYBLOB": columnTypeBinary, + "BLOB": columnTypeBinary, + "MEDIUMBLOB": columnTypeBinary, + "LONGBLOB": columnTypeBinary, } -var RandomizeTypesDefault = map[relfilter.ColumnType]relfilter.ColumnRule{ - columnTypeBinary: { - Type: misc.ValueTypeTemplate, - Value: "cmFuZG9taXplZCBiaW5hcnkgZGF0YQo=", - Unique: false, - }, - columnTypeNum: { - Type: misc.ValueTypeTemplate, - Value: "0", - Unique: false, - }, - columnTypeString: { - Type: misc.ValueTypeTemplate, - Value: "randomized string data", - Unique: false, - }, -} +func userCtxInit(s InitOpts) (*userCtx, error) { + + trc := []relfilter.TypeRuleOpts{} + trd := []relfilter.TypeRuleOpts{} + if s.Security.ColumnsPolicy == misc.SecurityPolicyColumnsRandomize { + trc = s.Rules.TypeRuleCustom + trd = typeRuleDefault + } + + f, err := relfilter.Init( + relfilter.InitOpts{ + TableRules: s.Rules.TableRules, + DefaultRules: s.Rules.DefaultRules, + ExceptionColumns: s.Rules.ExceptionColumns, + TypeRuleCustom: trc, + TypeRuleDefault: trd, + }, + ) + if err != nil { + return nil, fmt.Errorf("user ctx init: %w", err) + } -func userCtxInit(s InitSettings) *userCtx { return &userCtx{ - filter: relfilter.Init(s.Rules), + filter: f, security: securityCtx{ - tablePolicy: s.Security.TablePolicy, - tableExceptions: s.Security.TableExceptions, + tablesPolicy: s.Security.TablesPolicy, + tableExceptions: func() map[string]any { + excs := make(map[string]any) + for _, e := range s.Security.TableExceptions { + excs[e] = nil + } + return excs + }(), }, + tables: make(map[string]map[string]columnType), + }, nil +} + +func Init(r io.Reader, s InitOpts) (*MySQL, error) { + + uctx, err := userCtxInit(s) + if err != nil { + return nil, fmt.Errorf("mysql anonymizer init: %w", err) } + + return &MySQL{ + uctx: uctx, + sourceReader: r, + }, nil } -func Init(ctx context.Context, r io.Reader, s InitSettings) io.Reader { +func (m *MySQL) Run(ctx context.Context, w io.Writer) error { - return fsm.Init( - r, + ar := fsm.Init( + m.sourceReader, fsm.Description{ Ctx: ctx, - UserCtx: userCtxInit(s), + UserCtx: m.uctx, InitState: stateCreateSearch, States: map[fsm.StateName]fsm.State{ @@ -302,34 +336,8 @@ func Init(ctx context.Context, r io.Reader, s InitSettings) io.Reader { }, }, stateFieldsDescriptionNameTail: { - NextStates: func() []fsm.NextState { - - var nss []fsm.NextState - - for t := range typeKeys { - for i := 0; i < 2; i++ { - - s := t - if i == 1 { - s = strings.ToUpper(t) - } - - nss = append(nss, fsm.NextState{ - Name: stateFieldsDescriptionNameTail, - Switch: fsm.Switch{ - Trigger: []byte(s), - Delimiters: fsm.Delimiters{ - L: []byte{' '}, - R: []byte{' ', '(', ',', '\n'}, - }, - }, - DataHandler: dhCreateTableColumnTypeAdd, - }) - } - } - - // Additional states - nss = append(nss, fsm.NextState{ + NextStates: []fsm.NextState{ + { Name: stateFieldsDescriptionBlock, Switch: fsm.Switch{ Trigger: []byte(","), @@ -338,8 +346,8 @@ func Init(ctx context.Context, r io.Reader, s InitSettings) io.Reader { }, }, DataHandler: dhCreateTableColumnAdd, - }) - nss = append(nss, fsm.NextState{ + }, + { Name: statefFieldsDescriptionBlockEnd, Switch: fsm.Switch{ Trigger: []byte(")"), @@ -348,10 +356,8 @@ func Init(ctx context.Context, r io.Reader, s InitSettings) io.Reader { }, }, DataHandler: dhCreateTableColumnAdd, - }) - - return nss - }(), + }, + }, }, statefFieldsDescriptionBlockEnd: { NextStates: []fsm.NextState{ @@ -518,4 +524,11 @@ func Init(ctx context.Context, r io.Reader, s InitSettings) io.Reader { }, }, ) + + _, err := io.Copy(w, ar) + if err != nil { + return fmt.Errorf("mysql anonymizer run: %w", err) + } + + return nil } diff --git a/modules/anonymizers/mysql/security_types.go b/modules/anonymizers/mysql/security_types.go new file mode 100644 index 0000000..18fea11 --- /dev/null +++ b/modules/anonymizers/mysql/security_types.go @@ -0,0 +1,283 @@ +package mysql_anonymize + +import ( + "github.com/nixys/nxs-data-anonymizer/misc" + "github.com/nixys/nxs-data-anonymizer/modules/filters/relfilter" +) + +const ( + securityTypeString = "randomized string data" + securityTypeNum = "0" + securityTypeBinary = "cmFuZG9taXplZCBiaW5hcnkgZGF0YQo=" +) + +var typeRuleDefault = []relfilter.TypeRuleOpts{ + + // String + { + Selector: "(?i)^char", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeString, + Unique: false, + }, + }, + { + Selector: "(?i)^varchar", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeString, + Unique: false, + }, + }, + { + Selector: "(?i)^tinytext", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeString, + Unique: false, + }, + }, + { + Selector: "(?i)^text", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeString, + Unique: false, + }, + }, + { + Selector: "(?i)^mediumtext", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeString, + Unique: false, + }, + }, + { + Selector: "(?i)^longtext", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeString, + Unique: false, + }, + }, + { + Selector: "(?i)^enum", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeString, + Unique: false, + }, + }, + { + Selector: "(?i)^set", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeString, + Unique: false, + }, + }, + { + Selector: "(?i)^date", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeString, + Unique: false, + }, + }, + { + Selector: "(?i)^datetime", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeString, + Unique: false, + }, + }, + { + Selector: "(?i)^timestamp", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeString, + Unique: false, + }, + }, + { + Selector: "(?i)^time", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeString, + Unique: false, + }, + }, + { + Selector: "(?i)^year", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeString, + Unique: false, + }, + }, + { + Selector: "(?i)^json", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeString, + Unique: false, + }, + }, + + // Numeric + { + Selector: "(?i)^bit", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeNum, + Unique: false, + }, + }, + { + Selector: "(?i)^bool", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeNum, + Unique: false, + }, + }, + { + Selector: "(?i)^boolean", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeNum, + Unique: false, + }, + }, + { + Selector: "(?i)^tinyint", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeNum, + Unique: false, + }, + }, + { + Selector: "(?i)^smallint", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeNum, + Unique: false, + }, + }, + { + Selector: "(?i)^mediumint", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeNum, + Unique: false, + }, + }, + { + Selector: "(?i)^int", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeNum, + Unique: false, + }, + }, + { + Selector: "(?i)^integer", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeNum, + Unique: false, + }, + }, + { + Selector: "(?i)^bigint", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeNum, + Unique: false, + }, + }, + { + Selector: "(?i)^float", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeNum, + Unique: false, + }, + }, + { + Selector: "(?i)^double", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeNum, + Unique: false, + }, + }, + { + Selector: "(?i)^decimal", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeNum, + Unique: false, + }, + }, + { + Selector: "(?i)^dec", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeNum, + Unique: false, + }, + }, + + // Binary + { + Selector: "(?i)^binary", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeBinary, + Unique: false, + }, + }, + { + Selector: "(?i)^varbinary", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeBinary, + Unique: false, + }, + }, { + Selector: "(?i)^tinyblob", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeBinary, + Unique: false, + }, + }, + { + Selector: "(?i)^blob", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeBinary, + Unique: false, + }, + }, + { + Selector: "(?i)^mediumblob", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeBinary, + Unique: false, + }, + }, { + Selector: "(?i)^longblob", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeBinary, + Unique: false, + }, + }, +} diff --git a/modules/anonymizers/pgsql/dh.go b/modules/anonymizers/pgsql/dh.go index 87640cf..f0be4c3 100644 --- a/modules/anonymizers/pgsql/dh.go +++ b/modules/anonymizers/pgsql/dh.go @@ -3,6 +3,7 @@ package pgsql_anonymize import ( "bytes" "fmt" + "strings" "github.com/nixys/nxs-data-anonymizer/misc" "github.com/nixys/nxs-data-anonymizer/modules/filters/relfilter" @@ -42,23 +43,23 @@ func dhCreateTableDesc(usrCtx any, deferred, token []byte) ([]byte, error) { uctx := usrCtx.(*userCtx) - clmns := make(map[string]relfilter.ColumnType) + clmns := make(map[string]string) ss := bytes.Split(deferred, []byte{'\n'}) - for _, s := range ss { + for _, v := range ss { - s = bytes.TrimSuffix(bytes.TrimSpace(s), []byte{','}) + s := strings.TrimSuffix(strings.TrimSpace(string(v)), ",") if len(s) > 0 { - u := bytes.SplitN(s, []byte{' '}, 3) + u := strings.SplitN(s, " ", 2) // If column type does not specified within the dump if len(u) < 2 { - clmns[string(u[0])] = relfilter.ColumnTypeNone + clmns[u[0]] = "" } else { - clmns[string(u[0])] = columnType(string(u[1])) + clmns[u[0]] = u[1] } } } @@ -105,12 +106,10 @@ func dhFieldName(usrCtx any, deferred, token []byte) ([]byte, error) { return []byte{}, nil } - t, b := uctx.tables[uctx.filter.TableNameGet()][string(fname)] - if b == false { - t = relfilter.ColumnTypeNone - } - - uctx.filter.ColumnAdd(string(fname), t) + uctx.filter.ColumnAdd( + string(fname), + uctx.tables[uctx.filter.TableNameGet()][string(fname)], + ) return append(deferred, token...), nil } @@ -182,7 +181,7 @@ func rowDataGen(filter *relfilter.Filter) []byte { func securityPolicyCheck(uctx *userCtx, tname string) bool { // Continue if security policy is `skip` - if uctx.security.tablePolicy != misc.SecurityPolicyTablesSkip { + if uctx.security.tablesPolicy != misc.SecurityPolicyTablesSkip { return true } diff --git a/modules/anonymizers/pgsql/pgsql.go b/modules/anonymizers/pgsql/pgsql.go index 89dd7af..88da164 100644 --- a/modules/anonymizers/pgsql/pgsql.go +++ b/modules/anonymizers/pgsql/pgsql.go @@ -2,6 +2,7 @@ package pgsql_anonymize import ( "context" + "fmt" "io" "github.com/nixys/nxs-data-anonymizer/misc" @@ -10,97 +11,102 @@ import ( fsm "github.com/nixys/nxs-go-fsm" ) +type PgSQL struct { + uctx *userCtx + sourceReader io.Reader +} + type InitOpts struct { Security SecurityOpts - Rules relfilter.Rules + Rules RulesOpts +} + +type RulesOpts struct { + TableRules map[string]map[string]relfilter.ColumnRuleOpts + DefaultRules map[string]relfilter.ColumnRuleOpts + ExceptionColumns []string + TypeRuleCustom []relfilter.TypeRuleOpts } type SecurityOpts struct { - TablePolicy misc.SecurityPolicyTablesType - TableExceptions map[string]any + TablesPolicy misc.SecurityPolicyTablesType + ColumnsPolicy misc.SecurityPolicyColumnsType + TableExceptions []string } type userCtx struct { - filter *relfilter.Filter - + filter *relfilter.Filter + tn *string security securityCtx - - tn *string - tables map[string]map[string]relfilter.ColumnType + tables map[string]map[string]string } type securityCtx struct { tmpBuf []byte isSkip bool - tablePolicy misc.SecurityPolicyTablesType + tablesPolicy misc.SecurityPolicyTablesType tableExceptions map[string]any } -const ( - columnTypeString relfilter.ColumnType = "string" - columnTypeInt relfilter.ColumnType = "integer" - columnTypeFloat relfilter.ColumnType = "float" -) +func userCtxInit(s InitOpts) (*userCtx, error) { -var typeKeys = map[string]relfilter.ColumnType{ - - // Integer - "smallint": columnTypeInt, - "integer": columnTypeInt, - "bigint": columnTypeInt, - "smallserial": columnTypeInt, - "serial": columnTypeInt, - "bigserial": columnTypeInt, - - // Float - "decimal": columnTypeFloat, - "numeric": columnTypeFloat, - "real": columnTypeFloat, - "double": columnTypeFloat, - - // Strings - "character": columnTypeString, - "bpchar": columnTypeString, - "text": columnTypeString, -} + trc := []relfilter.TypeRuleOpts{} + trd := []relfilter.TypeRuleOpts{} + if s.Security.ColumnsPolicy == misc.SecurityPolicyColumnsRandomize { + trc = s.Rules.TypeRuleCustom + trd = typeRuleDefault + } -var RandomizeTypesDefault = map[relfilter.ColumnType]relfilter.ColumnRule{ - columnTypeInt: { - Type: misc.ValueTypeTemplate, - Value: "0", - Unique: false, - }, - columnTypeFloat: { - Type: misc.ValueTypeTemplate, - Value: "0.0", - Unique: false, - }, - columnTypeString: { - Type: misc.ValueTypeTemplate, - Value: "randomized string data", - Unique: false, - }, -} + f, err := relfilter.Init( + relfilter.InitOpts{ + TableRules: s.Rules.TableRules, + DefaultRules: s.Rules.DefaultRules, + ExceptionColumns: s.Rules.ExceptionColumns, + TypeRuleCustom: trc, + TypeRuleDefault: trd, + }, + ) + if err != nil { + return nil, fmt.Errorf("user ctx init: %w", err) + } -func userCtxInit(s InitOpts) *userCtx { return &userCtx{ - filter: relfilter.Init(s.Rules), - tables: make(map[string]map[string]relfilter.ColumnType), + filter: f, security: securityCtx{ - tablePolicy: s.Security.TablePolicy, - tableExceptions: s.Security.TableExceptions, + tablesPolicy: s.Security.TablesPolicy, + tableExceptions: func() map[string]any { + excs := make(map[string]any) + for _, e := range s.Security.TableExceptions { + excs[e] = nil + } + return excs + }(), }, + tables: make(map[string]map[string]string), + }, nil +} + +func Init(r io.Reader, s InitOpts) (*PgSQL, error) { + + uctx, err := userCtxInit(s) + if err != nil { + return nil, fmt.Errorf("pgsql anonymizer init: %w", err) } + + return &PgSQL{ + uctx: uctx, + sourceReader: r, + }, nil } -func Init(ctx context.Context, r io.Reader, s InitOpts) io.Reader { +func (p *PgSQL) Run(ctx context.Context, w io.Writer) error { - return fsm.Init( - r, + ar := fsm.Init( + p.sourceReader, fsm.Description{ Ctx: ctx, - UserCtx: userCtxInit(s), + UserCtx: p.uctx, InitState: stateInit, States: map[fsm.StateName]fsm.State{ @@ -234,12 +240,11 @@ func Init(ctx context.Context, r io.Reader, s InitOpts) io.Reader { }, }, ) -} -func columnType(key string) relfilter.ColumnType { - t, b := typeKeys[key] - if b == false { - return relfilter.ColumnTypeNone + _, err := io.Copy(w, ar) + if err != nil { + return fmt.Errorf("pgsql anonymizer run: %w", err) } - return t + + return nil } diff --git a/modules/anonymizers/pgsql/security_types.go b/modules/anonymizers/pgsql/security_types.go new file mode 100644 index 0000000..1bc9667 --- /dev/null +++ b/modules/anonymizers/pgsql/security_types.go @@ -0,0 +1,125 @@ +package pgsql_anonymize + +import ( + "github.com/nixys/nxs-data-anonymizer/misc" + "github.com/nixys/nxs-data-anonymizer/modules/filters/relfilter" +) + +const ( + securityTypeInt = "0" + securityTypeFloat = "0.0" + securityTypeString = "randomized string data" +) + +var typeRuleDefault = []relfilter.TypeRuleOpts{ + + // Integer + { + Selector: "(?i)^smallint", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeInt, + Unique: false, + }, + }, + { + Selector: "(?i)^integer", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeInt, + Unique: false, + }, + }, + { + Selector: "(?i)^bigint", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeInt, + Unique: false, + }, + }, + { + Selector: "(?i)^smallserial", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeInt, + Unique: false, + }, + }, + { + Selector: "(?i)^serial", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeInt, + Unique: false, + }, + }, + { + Selector: "(?i)^bigserial", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeInt, + Unique: false, + }, + }, + + // Float + { + Selector: "(?i)^decimal", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeFloat, + Unique: false, + }, + }, + { + Selector: "(?i)^numeric", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeFloat, + Unique: false, + }, + }, + { + Selector: "(?i)^real", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeFloat, + Unique: false, + }, + }, + { + Selector: "(?i)^double", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeFloat, + Unique: false, + }, + }, + + // Strings + { + Selector: "(?i)^character", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeString, + Unique: false, + }, + }, + { + Selector: "(?i)^bpchar", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeString, + Unique: false, + }, + }, + { + Selector: "(?i)^text", + Rule: relfilter.ColumnRuleOpts{ + Type: misc.ValueTypeTemplate, + Value: securityTypeString, + Unique: false, + }, + }, +} diff --git a/modules/filters/relfilter/column.go b/modules/filters/relfilter/column.go index 887013d..5884c53 100644 --- a/modules/filters/relfilter/column.go +++ b/modules/filters/relfilter/column.go @@ -6,18 +6,8 @@ type columns struct { } type column struct { - n string - t ColumnType -} - -type ColumnType string - -const ( - ColumnTypeNone ColumnType = "none" -) - -func (c ColumnType) String() string { - return string(c) + n string + rawType string } func columnsInit() columns { @@ -27,22 +17,22 @@ func columnsInit() columns { } } -func (c *columns) add(name string, t ColumnType) { +func (c *columns) add(name string, rt string) { v := column{ - n: name, - t: t, + n: name, + rawType: rt, } c.cc = append(c.cc, &v) c.m[name] = &v } -func (c *columns) typeGetByIndex(index int) ColumnType { +func (c *columns) getNameByIndex(index int) string { if index >= len(c.cc) { - return ColumnTypeNone + return "" } - return c.cc[index].t + return c.cc[index].n } func (c *columns) delByName(name string) { diff --git a/modules/filters/relfilter/filter.go b/modules/filters/relfilter/filter.go index 8278393..9f0e7c1 100644 --- a/modules/filters/relfilter/filter.go +++ b/modules/filters/relfilter/filter.go @@ -4,22 +4,26 @@ import ( "bytes" "fmt" "os/exec" + "regexp" "github.com/nixys/nxs-data-anonymizer/misc" ) -type Rules struct { - Tables map[string]TableRules - ExceptionColumns map[string]any - Defaults TableRules - RandomizeTypes map[ColumnType]ColumnRule +type InitOpts struct { + TableRules map[string]map[string]ColumnRuleOpts + DefaultRules map[string]ColumnRuleOpts + ExceptionColumns []string + + TypeRuleCustom []TypeRuleOpts + TypeRuleDefault []TypeRuleOpts } -type TableRules struct { - Columns map[string]ColumnRule +type TypeRuleOpts struct { + Selector string + Rule ColumnRuleOpts } -type ColumnRule struct { +type ColumnRuleOpts struct { Type misc.ValueType Value string Unique bool @@ -28,7 +32,7 @@ type ColumnRule struct { type Filter struct { // Rules for filter a table values - rules Rules + rules rules // Temp table data for filtering tableData tableData @@ -38,6 +42,20 @@ type Row struct { Values []rowValue } +type rules struct { + tableRules map[string]map[string]ColumnRuleOpts + defaultRules map[string]ColumnRuleOpts + exceptionColumns map[string]any + + typeRuleCustom []typeRule + typeRuleDefault []typeRule +} + +type typeRule struct { + Rgx *regexp.Regexp + Rule ColumnRuleOpts +} + type tableData struct { name string columns columns @@ -57,16 +75,66 @@ const ( envVarCurColumn = "ENVVARCURCOLUMN" ) -type rule struct { +type applyRule struct { c *column i int - cr ColumnRule + cr ColumnRuleOpts } -func Init(rules Rules) *Filter { - return &Filter{ - rules: rules, +func Init(opts InitOpts) (*Filter, error) { + + trc := []typeRule{} + trd := []typeRule{} + + // Make custom type rules + for _, r := range opts.TypeRuleCustom { + + re, err := regexp.Compile(r.Selector) + if err != nil { + return nil, fmt.Errorf("filter init: %w", err) + } + + trc = append( + trc, + typeRule{ + Rgx: re, + Rule: r.Rule, + }, + ) } + + // Make default type rules + for _, r := range opts.TypeRuleDefault { + + re, err := regexp.Compile(r.Selector) + if err != nil { + return nil, fmt.Errorf("filter init: %w", err) + } + + trd = append( + trd, + typeRule{ + Rgx: re, + Rule: r.Rule, + }, + ) + } + + // Make exceptions + excpts := make(map[string]any) + for _, e := range opts.ExceptionColumns { + excpts[e] = nil + } + + return &Filter{ + rules: rules{ + tableRules: opts.TableRules, + defaultRules: opts.DefaultRules, + exceptionColumns: excpts, + typeRuleCustom: trc, + typeRuleDefault: trd, + }, + }, nil } // TableCreate creates new data set for table `name` @@ -84,20 +152,20 @@ func (filter *Filter) TableNameGet() string { } // TableRulesLookup looks up filters for specified table name -func (filter *Filter) TableRulesLookup(name string) *TableRules { - if t, b := filter.rules.Tables[name]; b { - return &t +func (filter *Filter) TableRulesLookup(name string) map[string]ColumnRuleOpts { + if t, b := filter.rules.tableRules[name]; b { + return t } return nil } // ColumnAdd adds new column into current data set -func (filter *Filter) ColumnAdd(name string, t ColumnType) { - filter.tableData.columns.add(name, t) +func (filter *Filter) ColumnAdd(name string, rt string) { + filter.tableData.columns.add(name, rt) } -func (filter *Filter) ColumnTypeGet(index int) ColumnType { - return filter.tableData.columns.typeGetByIndex(index) +func (filter *Filter) ColumnGetName(index int) string { + return filter.tableData.columns.getNameByIndex(index) } func (filter *Filter) ValueAdd(b []byte) { @@ -124,7 +192,7 @@ func (filter *Filter) ValuePop() Row { func (filter *Filter) Apply() error { - var rls []rule + var rls []applyRule tname := filter.tableData.name @@ -136,11 +204,11 @@ func (filter *Filter) Apply() error { // Check direct rules for column if tr != nil { - if cr, e := tr.Columns[c.n]; e == true { + if cr, e := tr[c.n]; e == true { rls = append( rls, - rule{ + applyRule{ c: c, i: i, cr: cr, @@ -151,10 +219,10 @@ func (filter *Filter) Apply() error { } // Check default rules for column - if cr, e := filter.rules.Defaults.Columns[c.n]; e == true { + if cr, e := filter.rules.defaultRules[c.n]; e == true { rls = append( rls, - rule{ + applyRule{ c: c, i: i, cr: cr, @@ -163,21 +231,49 @@ func (filter *Filter) Apply() error { continue } - // Check randomize rules for column - if cr, b := filter.rules.RandomizeTypes[c.t]; b { + // Check column is excepted + if _, b := filter.rules.exceptionColumns[c.n]; b { + continue + } - // Check that column excepted - if _, b := filter.rules.ExceptionColumns[c.n]; !b { - rls = append( - rls, - rule{ - c: c, - i: i, - cr: cr, - }, - ) - continue + // Check custom type rule for column + if b := func() bool { + for _, r := range filter.rules.typeRuleCustom { + if r.Rgx.Match([]byte(c.rawType)) { + rls = append( + rls, + applyRule{ + c: c, + i: i, + cr: r.Rule, + }, + ) + return true + } + } + return false + }(); b { + continue + } + + // Check default type rule for column + if b := func() bool { + for _, r := range filter.rules.typeRuleDefault { + if r.Rgx.Match([]byte(c.rawType)) { + rls = append( + rls, + applyRule{ + c: c, + i: i, + cr: r.Rule, + }, + ) + return true + } } + return false + }(); b { + continue } // Other rules if required @@ -191,7 +287,7 @@ func (filter *Filter) Apply() error { return nil } -func (filter *Filter) applyRules(tname string, rls []rule) error { +func (filter *Filter) applyRules(tname string, rls []applyRule) error { // If no columns has rules if len(rls) == 0 { @@ -240,7 +336,7 @@ func (filter *Filter) applyRules(tname string, rls []rule) error { return nil } -func (filter *Filter) applyFilter(cn string, cr ColumnRule, td misc.TemplateData, tde []string) ([]byte, error) { +func (filter *Filter) applyFilter(cn string, cr ColumnRuleOpts, td misc.TemplateData, tde []string) ([]byte, error) { for i := 0; i < uniqueAttempts; i++ { diff --git a/routines/anonymizer/anonymizer.go b/routines/anonymizer/anonymizer.go index 8aa85ef..cfa907a 100644 --- a/routines/anonymizer/anonymizer.go +++ b/routines/anonymizer/anonymizer.go @@ -8,26 +8,19 @@ import ( "github.com/docker/go-units" "github.com/nixys/nxs-data-anonymizer/ctx" - "github.com/nixys/nxs-data-anonymizer/misc" + "github.com/nixys/nxs-data-anonymizer/interfaces" "github.com/sirupsen/logrus" appctx "github.com/nixys/nxs-go-appctx/v3" - - mysql_anonymize "github.com/nixys/nxs-data-anonymizer/modules/anonymizers/mysql" - pgsql_anonymize "github.com/nixys/nxs-data-anonymizer/modules/anonymizers/pgsql" - "github.com/nixys/nxs-data-anonymizer/modules/filters/relfilter" - progressreader "github.com/nixys/nxs-data-anonymizer/modules/progress_reader" ) -type anonymizeSettings struct { +type anonymizeOpts struct { c context.Context l *logrus.Logger - pr *progressreader.ProgressReader ch chan error db ctx.DBCtx - rs relfilter.Rules w io.Writer - s ctx.SecurityCtx + a interfaces.Anonymizer } func Runtime(app appctx.App) error { @@ -45,10 +38,7 @@ func Runtime(app appctx.App) error { cx, cf := context.WithCancel(app.SelfCtx()) defer cf() - // Init progress reader - pr := progressreader.Init(cc.Input) - - c := make(chan error, 1) + ch := make(chan error, 1) timer = time.NewTimer(cc.Progress.Rhythm) if cc.Progress.Rhythm == 0 { @@ -56,15 +46,13 @@ func Runtime(app appctx.App) error { } if err := anonymize( - anonymizeSettings{ + anonymizeOpts{ c: cx, l: cc.Log, - pr: pr, - ch: c, + ch: ch, db: cc.DB, - rs: cc.Rules, w: cc.Output, - s: cc.Security, + a: cc.Anonymizer, }, ); err != nil { return err @@ -75,17 +63,17 @@ func Runtime(app appctx.App) error { case <-app.SelfCtxDone(): // Log reader progress if necessary - if cc.Progress.Rhythm != 0 && lb != pr.Bytes() { - progressLog(cc.Log, pr.Bytes(), cc.Progress.Humanize) + if cc.Progress.Rhythm != 0 && lb != cc.PR.Bytes() { + progressLog(cc.Log, cc.PR.Bytes(), cc.Progress.Humanize) } cc.Log.Info("anonymizer routine done") return nil - case err := <-c: + case err := <-ch: // Log reader progress if necessary - if cc.Progress.Rhythm != 0 && lb != pr.Bytes() { - progressLog(cc.Log, pr.Bytes(), cc.Progress.Humanize) + if cc.Progress.Rhythm != 0 && lb != cc.PR.Bytes() { + progressLog(cc.Log, cc.PR.Bytes(), cc.Progress.Humanize) } if err != nil { @@ -102,7 +90,7 @@ func Runtime(app appctx.App) error { case <-timer.C: // Save bytes count printed in log last time - lb = pr.Bytes() + lb = cc.PR.Bytes() // Log reader progress progressLog(cc.Log, lb, cc.Progress.Humanize) @@ -112,62 +100,21 @@ func Runtime(app appctx.App) error { } } -func anonymize(st anonymizeSettings) error { - - // Anonymizer reader - var ar io.Reader - - // Init anonymize reader in accordance with specified database type - switch st.db.Type { - case ctx.DBTypeMySQL: +func anonymize(st anonymizeOpts) error { - // Drop database tables if necessary (experimental) - if st.db.Cleanup == true && st.db.MySQL != nil { - if err := st.db.MySQL.DBCleanup(); err != nil { + if st.db.Type == ctx.DBTypeMySQL && st.db.Cleanup == true && st.db.MySQL != nil { + if err := st.db.MySQL.DBCleanup(); err != nil { - st.l.WithFields(logrus.Fields{ - "details": err, - }).Errorf("anonymize: MySQL clean up") + st.l.WithFields(logrus.Fields{ + "details": err, + }).Errorf("anonymize: MySQL clean up") - return err - } + return err } - - ar = mysql_anonymize.Init( - st.c, - st.pr, - mysql_anonymize.InitSettings{ - Security: mysql_anonymize.SecuritySettings{ - TablePolicy: st.s.TablePolicy, - TableExceptions: st.s.TableExceptions, - }, - Rules: st.rs, - }, - ) - case ctx.DBTypePgSQL: - ar = pgsql_anonymize.Init( - st.c, - st.pr, - pgsql_anonymize.InitOpts{ - Security: pgsql_anonymize.SecurityOpts{ - TablePolicy: st.s.TablePolicy, - TableExceptions: st.s.TableExceptions, - }, - Rules: st.rs, - }, - ) - default: - - st.l.WithFields(logrus.Fields{ - "details": "unknown database type", - }).Errorf("anonymize") - - return misc.ErrRuntime } go func() { - _, err := io.Copy(st.w, ar) - st.ch <- err + st.ch <- st.a.Run(st.c, st.w) }() return nil