-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun-auto-masklet.ps1
334 lines (305 loc) · 16.8 KB
/
run-auto-masklet.ps1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
param (
$sqlInstance = "localhost",
$sqlUser = "",
$sqlPassword = "",
$output = "C:/temp/auto-masklet",
$trustCert = $true,
$backupPath = "",
$databaseName = "Northwind",
[switch]$autoContinue,
[switch]$skipAuth
)
# Configuration
$sourceDb = "${databaseName}_FullRestore"
$targetDb = "${databaseName}_Subset"
$fullRestoreCreateScript = "$PSScriptRoot/helper_scripts/CreateNorthwindFullRestore.sql"
$subsetCreateScript = "$PSScriptRoot/helper_scripts/CreateNorthwindSubset.sql"
$installTdmClisScript = "$PSScriptRoot/helper_scripts/installTdmClis.ps1"
$helperFunctions = "$PSScriptRoot/helper_scripts/helper-functions.psm1"
$subsetterOptionsFile = "$PSScriptRoot\helper_scripts\rgsubset-options-northwind.json"
$winAuth = $true
$sourceConnectionString = ""
$targetConnectionString = ""
if (($sqlUser -like "") -and ($sqlPassword -like "")){
$sourceConnectionString = "`"server=$sqlInstance;database=$sourceDb;Trusted_Connection=yes;TrustServerCertificate=yes`""
$targetConnectionString = "`"server=$sqlInstance;database=$targetDb;Trusted_Connection=yes;TrustServerCertificate=yes`""
}
else {
$winAuth = $false
$SqlCredential = New-Object -TypeName System.Management.Automation.PSCredential -ArgumentList $sqlUser, (ConvertTo-SecureString $sqlPassword -AsPlainText -Force)
$sourceConnectionString = "server=$sqlInstance;database=$sourceDb;TrustServerCertificate=yes;User Id=$sqlUser;Password=$sqlPassword;"
$targetConnectionString = "server=$sqlInstance;database=$targetDb;TrustServerCertificate=yes;User Id=$sqlUser;Password=$sqlPassword;"
}
Write-Output "Configuration:"
Write-Output "- sqlInstance: $sqlInstance"
Write-Output "- databaseName: $databaseName"
Write-Output "- sourceDb: $sourceDb"
Write-Output "- targetDb: $targetDb"
Write-Output "- fullRestoreCreateScript: $fullRestoreCreateScript"
Write-Output "- subsetCreateScript: $subsetCreateScript"
Write-Output "- installTdmClisScript: $installTdmClisScript"
Write-Output "- helperFunctions: $helperFunctions"
Write-Output "- subsetterOptionsFile: $subsetterOptionsFile"
Write-Output "- Using Windows Auth: $winAuth"
Write-Output "- sourceConnectionString: $sourceConnectionString"
Write-Output "- targetConnectionString: $targetConnectionString"
Write-Output "- output: $output"
Write-Output "- trustCert: $trustCert"
Write-Output "- backupPath: $backupPath"
Write-Output ""
Write-Output "Initial setup:"
# Unblocking all files in thi repo (typically required if code is downloaded as zip)
Get-ChildItem -Path $PSScriptRoot -Recurse | Unblock-File
# Importing helper functions
Write-Output " Importing helper functions"
import-module $helperFunctions
$requiredFunctions = @(
"Install-Dbatools",
"New-SampleDatabases",
"Restore-StagingDatabasesFromBackup"
)
# Testing that all the required functions are available
$requiredFunctions | ForEach-Object {
if (-not (Get-Command $_ -ErrorAction SilentlyContinue)){
Write-Error " Error: Required function $_ not found. Please review any errors above."
exit
}
else {
Write-Output " $_ found."
}
}
# Installing/importing dbatools
Write-Output " Installing dbatools"
$dbatoolsInstalledSuccessfully = Install-Dbatools -autoContinue:$autoContinue -trustCert:$trustCert
if ($dbatoolsInstalledSuccessfully){
Write-Output " dbatools installed successfully"
}
else {
Write-Error " dbatools failed to install. Please review any errors above."
break
}
# Download/update rgsubset and rganonymize CLIs
Write-Output " Ensuring the following Redgate Test Data Manager CLIs are installed and up to date: rgsubset, rganonymize"
powershell -File $installTdmClisScript
# Refreshing the environment variables so that the new path is available
$env:Path = [System.Environment]::GetEnvironmentVariable("Path","Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path","User")
# Verifying that the CLIs are both available
$rganonymizeExe = (Get-Command rganonymize).Source
$rgsubsetExe = (Get-Command rgsubset).Source
if (-not $rganonymizeExe){
Write-Warning "Warning: Failed to install rganonymize."
}
if (-not $rgsubsetExe) {
Write-Warning "Warning: Failed to install rgsubset."
}
if (-not ($rganonymizeExe -and $rgsubsetExe)){
Write-Error "Error: rgsubset and/or rganonymize CLIs not found. This script should have installed them. Please review any errors/warnings above."
break
}
# Start trial
if (-not $skipAuth){
Write-Output " Authorizing rgsubset, and starting a trial (if not already started):"
Write-Output " rgsubset auth login --i-agree-to-the-eula --start-trial"
rgsubset auth login --i-agree-to-the-eula --start-trial
Write-Output " Authorizing rganonymize:"
Write-Output " rganonymize auth login --i-agree-to-the-eula"
rganonymize auth login --i-agree-to-the-eula
}
# Logging the CLI versions for reference
Write-Output ""
Write-Output "rgsubset version is:"
rgsubset --version
Write-Output "rganonymize version is:"
rganonymize --version
Write-Output ""
# Building staging databases
if ($backupPath) {
# Using the Restore-StagingDatabasesFromBackup function in helper-functions.psm1 to build source and target databases from an existing backup
Write-Output " Building $sourceDb and $targetDb databases from backup file saved at $BackupPath."
$dbCreateSuccessful = Restore-StagingDatabasesFromBackup -WinAuth:$winAuth -sqlInstance:$sqlInstance -sourceDb:$sourceDb -targetDb:$targetDb -sourceBackupPath:$backupPath -SqlCredential:$SqlCredential
if ($dbCreateSuccessful){
Write-Output " Source and target databases created successfully."
}
else {
Write-Error " Error: Failed to create the source and target databases. Please review any errors above."
break
}
}
else {
# Using the Build-SampleDatabases function in helper-functions.psm1, and provided sql create scripts, to build sample source and target databases
Write-Output " Building sample Northwind source and target databases."
$dbCreateSuccessful = New-SampleDatabases -WinAuth:$winAuth -sqlInstance:$sqlInstance -sourceDb:$sourceDb -targetDb:$targetDb -fullRestoreCreateScript:$fullRestoreCreateScript -subsetCreateScript:$subsetCreateScript -SqlCredential:$SqlCredential
if ($dbCreateSuccessful){
Write-Output " Source and target databases created successfully."
}
else {
Write-Error " Error: Failed to create the source and target databases. Please review any errors above."
break
}
}
# Clean output directory
Write-Output " Cleaning the output directory at: $output"
if (Test-Path $output){
Write-Output " Recursively deleting the existing output directory, and any files from previous runs."
Remove-Item -Recurse -Force $output | Out-Null
}
Write-Output " Creating a clean output directory."
New-Item -ItemType Directory -Path $output | Out-Null
Write-Output ""
Write-Output "*********************************************************************************************************"
Write-Output "Observe:"
Write-Output "There should now be two databases on the $sqlInstance server: $sourceDb and $targetDb"
Write-Output "$sourceDb should contain some data"
if ($backupPath){
Write-Output "$targetDb should be identical. In an ideal world, it would be schema identical, but empty of data."
}
else {
Write-Output "$targetDb should have an identical schema, but no data"
Write-Output ""
Write-Output "For example, you could run the following script in your prefered IDE:"
Write-Output ""
Write-Output " USE $sourceDb"
Write-Output " --USE $targetDb -- Uncomment to run the same query on the target database"
Write-Output " "
Write-Output " SELECT COUNT (*) AS TotalOrders"
Write-Output " FROM dbo.Orders;"
Write-Output " "
Write-Output " SELECT TOP 20 o.OrderID AS 'o.OrderId' ,"
Write-Output " o.CustomerID AS 'o.CustomerID' ,"
Write-Output " o.ShipAddress AS 'o.ShipAddress' ,"
Write-Output " o.ShipCity AS 'o.ShipCity' ,"
Write-Output " c.Address AS 'c.Address' ,"
Write-Output " c.City AS 'c.ShipCity'"
Write-Output " FROM dbo.Customers c"
Write-Output " JOIN dbo.Orders o ON o.CustomerID = c.CustomerID"
Write-Output " ORDER BY o.OrderID ASC;"
}
Write-Output ""
Write-Output "Next:"
Write-Output "We will run the following rgsubset command to copy a subset of the data from $sourceDb to $targetDb."
if ($backupPath){
Write-Output " rgsubset run --database-engine=sqlserver --source-connection-string=$sourceConnectionString --target-connection-string=$targetConnectionString --target-database-write-mode Overwrite"
}
else {
Write-Output " rgsubset run --database-engine=sqlserver --source-connection-string=$sourceConnectionString --target-connection-string=$targetConnectionString --options-file `"$subsetterOptionsFile`" --target-database-write-mode Overwrite"
Write-Output "The subset will include data from the starting table, based on the options set here: $subsetterOptionsFile."
}
Write-Output "*********************************************************************************************************"
Write-Output ""
if (-not $autoContinue){
$continue = Read-Host "Continue? (y/n)"
if ($continue -notlike "y"){
Write-output 'Response not like "y". Teminating script.'
break
}
}
# running subset
Write-Output ""
Write-Output "Running rgsubset to copy a subset of the data from $sourceDb to $targetDb."
if ($backupPath){
rgsubset run --database-engine=sqlserver --source-connection-string=$sourceConnectionString --target-connection-string=$targetConnectionString --target-database-write-mode Overwrite
}
else {
rgsubset run --database-engine=sqlserver --source-connection-string=$sourceConnectionString --target-connection-string=$targetConnectionString --options-file="$subsetterOptionsFile" --target-database-write-mode Overwrite
}
Write-Output ""
Write-Output "*********************************************************************************************************"
Write-Output "Observe:"
Write-Output "$targetDb should contain a subset of the data from $sourceDb."
Write-Output ""
Write-Output "Next:"
Write-Output "We will run rganonymize classify to create a classification.json file, documenting the location of any PII:"
Write-Output " rganonymize classify --database-engine SqlServer --connection-string $targetConnectionString --classification-file `"$output\classification.json`" --output-all-columns"
Write-Output "*********************************************************************************************************"
Write-Output ""
if (-not $autoContinue){
$continue = Read-Host "Continue? (y/n)"
if ($continue -notlike "y"){
Write-output 'Response not like "y". Teminating script.'
break
}
}
Write-Output "Creating a classification.json file in $output"
rganonymize classify --database-engine SqlServer --connection-string=$targetConnectionString --classification-file "$output\classification.json" --output-all-columns
Write-Output ""
Write-Output "*********************************************************************************************************"
Write-Output "Observe:"
Write-Output "Review the classification.json file save at: $output"
Write-Output "This file documents any PII that has been found automatically in the $targetDb database."
Write-Output "You can tweak this file as necessary and keep it in source control to inform future masking runs."
Write-Output "You could even create CI builds that cross reference this file against your database source code,"
Write-Output " to ensure developers always add appropriate classifications for new columns before they get"
Write-Output " deployed to production."
Write-Output ""
Write-Output "Next:"
Write-Output "We will run the rganonymize map command to create a masking.json file, defining how the PII will be masked:"
Write-Output " rganonymize map --classification-file `"$output\classification.json`" --masking-file `"$output\masking.json`""
Write-Output "*********************************************************************************************************"
Write-Output ""
if (-not $autoContinue){
$continue = Read-Host "Continue? (y/n)"
if ($continue -notlike "y"){
Write-output 'Response not like "y". Teminating script.'
break
}
}
Write-Output "Creating a masking.json file based on contents of classification.json in $output"
rganonymize map --classification-file="$output\classification.json" --masking-file="$output\masking.json"
Write-Output ""
Write-Output "*********************************************************************************************************"
Write-Output "Observe:"
Write-Output "Review the masking.json file save at: $output"
Write-Output "This file defines how the PII found in the $targetDb database will be masked."
Write-Output "You can save this in source control, and set up an automated masking job to"
Write-Output " create a fresh masked copy, with the latest data, on a nightly or weekly"
Write-Output " basis, or at an appropriate point in your sprint/release cycle."
Write-Output ""
Write-Output "Next:"
Write-Output "We will run the rganonymize mask command to mask the PII in ${targetDb}:"
Write-Output " rganonymize mask --database-engine SqlServer --connection-string $targetConnectionString --masking-file `"$output\masking.json`""
Write-Output "*********************************************************************************************************"
Write-Output ""
if (-not $autoContinue){
$continue = Read-Host "Continue? (y/n)"
if ($continue -notlike "y"){
Write-output 'Response not like "y". Teminating script.'
break
}
}
Write-Output "Masking target database, based on contents of masking.json file in $output"
rganonymize mask --database-engine SqlServer --connection-string=$targetConnectionString --masking-file="$output\masking.json"
Write-Output ""
Write-Output "*********************************************************************************************************"
Write-Output "Observe:"
Write-Output "The data in the $targetDb database should now be masked."
Write-Output "Review the data in the $sourceDb and $targetDb databases. Are you happy with the way they have been subsetted and masked?"
Write-Output "Things you may like to look out for:"
Write-Output " - Notes fields (e.g. Employees.Notes)"
Write-Output " - Dependencies (e.g. If using the sample Northwind database, observer the Orders.ShipAddress and Customers.Address, joined on the CustoemrID column in each table"
Write-Output ""
Write-Output "Additional tasks:"
Write-Output "Review both rgsubset-options.json examples in ./helper_scripts, as well as this documentation about using options files:"
Write-Output " https://documentation.red-gate.com/testdatamanager/command-line-interface-cli/subsetting/subsetting-configuration/subsetting-configuration-file"
Write-Output "To apply a more thorough mask on the notes fields, review this documentation, and configure this project to a Lorem Ipsum"
Write-Output " masking rule for any 'notes' fields:"
Write-Output " - Default classifications and datasets:"
Write-Output " https://documentation.red-gate.com/testdatamanager/command-line-interface-cli/anonymization/default-classifications-and-datasets"
Write-Output " - Applying custom classification rules:"
Write-Output " https://documentation.red-gate.com/testdatamanager/command-line-interface-cli/anonymization/custom-configuration/classification-rules"
Write-Output " - Using different or custom data sets:"
Write-Output " https://documentation.red-gate.com/testdatamanager/command-line-interface-cli/anonymization/custom-configuration/using-different-or-custom-datasets"
Write-Output ""
Write-Output "Once you have verified that all the PII has been removed, you can backup this version of"
Write-output " the database, and share it with your developers for dev/test purposes."
Write-Output ""
Write-Output "************************************** FINISHED! **************************************"
Write-Output ""
Write-Output "CONGRATULATIONS!"
Write-Output "You've completed a minimal viable Test Data Manager proof of concept."
Write-Output "Next, review the following resources:"
Write-Output " - Documentation: https://documentation.red-gate.com/testdatamanager/command-line-interface-cli"
Write-Output " - Training: https://www.red-gate.com/hub/university/courses/test-data-management/cloning/overview/introduction-to-tdm"
Write-Output "Can you subset and mask one of your own databases?"
Write-Output ""
Write-Output "Want to learn more? If you have a Redgate account manager, they can help you get started."
Write-Output "Otherwise, email us, and let's start a conversation: [email protected]"