From d6096c9ae2117b7657b50a545cd8efddfdb62cdf Mon Sep 17 00:00:00 2001 From: "David W. Streever" Date: Thu, 1 Jun 2023 03:31:16 -0400 Subject: [PATCH] - Auto-Tuning - Introduction of basic stats regarding file counts/sizes for large tables. We'll make adjustments to DISTRIBUTE BY and Tez Groupings to provide more efficient/balanced migrations with better/more optimized file sizes after migration for migrations using SQL. https://github.com/cloudera-labs/hms-mirror/issues/53 - AAdditional table filters (`-tfs|--table-filter-size-limit` and `-tfp|--table-filter-partition-count-limit`) that check a tables data size and partition count limits can also be applied to narrow the range of tables you'll process. https://github.com/cloudera-labs/hms-mirror/issues/55 - Add property to tables migrated with "STORAGE_MIGRATION" to identify and filter them out from future runs. https://github.com/cloudera-labs/hms-mirror/issues/56 - `-cto|--compress-text-output` option and additional session level settings using basic stats. - HDP3 scenario that doesn't support MANAGEDLOCATION element in database properties. https://github.com/cloudera-labs/hms-mirror/issues/52 - AVRO Schema Only Fix.. https://github.com/cloudera-labs/hms-mirror/issues/58 - Cleanup messaging around legacy config settings. - Fix/Added `dbRegEx` command line parameter: https://github.com/cloudera-labs/hms-mirror/issues/57 Configuration Breaking Change. If you see note about `A configuration element is no longer valid, progress. Please remove the element from the configuration yaml and try again.` with `Caused by: com.fasterxml.jackson.databind.exc.UnrecognizedPropertyException: Unrecognized field "tblRegEx"`, please remove the properties `dbRegEx`, `tblRegEx` and `tblExcludeRegEx` from the config yaml. --- README.md | 586 +++++++++++------- pom.xml | 2 +- .../cloudera/utils/hadoop/hms/Context.java | 33 + .../com/cloudera/utils/hadoop/hms/Mirror.java | 105 +++- .../utils/hadoop/hms/mirror/Cluster.java | 202 ++++-- .../utils/hadoop/hms/mirror/Config.java | 81 +-- .../utils/hadoop/hms/mirror/Conversion.java | 52 +- .../utils/hadoop/hms/mirror/DBMirror.java | 15 +- .../hadoop/hms/mirror/EnvironmentTable.java | 13 +- .../utils/hadoop/hms/mirror/Filter.java | 85 +++ .../utils/hadoop/hms/mirror/MirrorConf.java | 166 ++--- .../utils/hadoop/hms/mirror/Optimization.java | 35 +- .../utils/hadoop/hms/mirror/SerdeType.java | 34 + .../utils/hadoop/hms/mirror/SessionVars.java | 18 + .../hadoop/hms/mirror/StatsCalculator.java | 108 ++++ .../utils/hadoop/hms/mirror/TableMirror.java | 145 +++-- .../hadoop/hms/mirror/TablePropertyVars.java | 54 ++ .../mirror/feature/BadParquetDefFeature.java | 5 +- .../hadoop/hms/stage/GetTableMetadata.java | 4 +- .../utils/hadoop/hms/stage/Setup.java | 44 ++ .../utils/hadoop/hms/stage/Transfer.java | 54 +- .../utils/hadoop/hms/util/TableUtils.java | 225 ++++--- .../cloudera/utils/hadoop/hms/DataState.java | 46 +- .../hadoop/hms/EncryptValidationTest.java | 72 +-- .../utils/hadoop/hms/MirrorTest01.java | 8 + .../cloudera/utils/hadoop/hms/TestSQL.java | 2 +- .../{ => datastrategy}/AVROMigrationTest.java | 26 +- .../ConfigValidationTest.java | 36 +- .../ConfigValidationTest01.java | 32 +- .../DistcpMigrationTest.java | 24 +- .../hms/{ => datastrategy}/DumpTest.java | 25 +- .../ExpImpDataMigrationTest.java | 26 +- .../HybridDataMigrationTest.java | 26 +- .../LegacyConfigValidationTest.java | 138 +---- .../LegacyDistcpMigrationTest.java | 25 +- .../LegacyExpImpDataMigrationTest.java | 26 +- .../LegacyHybridDataMigrationTest.java | 26 +- .../LegacySQLDataMigrationTest.java | 48 +- .../LegacySchemaMigrationTest.java | 29 +- .../{ => datastrategy}/MirrorTestBase.java | 49 +- .../SQLDataMigrationTest.java | 26 +- .../SchemaMigrationTest.java | 24 +- .../StorageMigrationTest.java | 26 +- .../StructEscapeFieldsFeatureTest.java | 108 ++++ .../utils/hadoop/hms/util/TableUtilsTest.java | 206 +++++- 45 files changed, 1841 insertions(+), 1279 deletions(-) create mode 100644 src/main/java/com/cloudera/utils/hadoop/hms/Context.java create mode 100644 src/main/java/com/cloudera/utils/hadoop/hms/mirror/Filter.java create mode 100644 src/main/java/com/cloudera/utils/hadoop/hms/mirror/SerdeType.java create mode 100644 src/main/java/com/cloudera/utils/hadoop/hms/mirror/SessionVars.java create mode 100644 src/main/java/com/cloudera/utils/hadoop/hms/mirror/StatsCalculator.java create mode 100644 src/main/java/com/cloudera/utils/hadoop/hms/mirror/TablePropertyVars.java rename src/test/java/com/cloudera/utils/hadoop/hms/{ => datastrategy}/AVROMigrationTest.java (87%) rename src/test/java/com/cloudera/utils/hadoop/hms/{ => datastrategy}/ConfigValidationTest.java (93%) rename src/test/java/com/cloudera/utils/hadoop/hms/{ => datastrategy}/ConfigValidationTest01.java (83%) rename src/test/java/com/cloudera/utils/hadoop/hms/{ => datastrategy}/DistcpMigrationTest.java (91%) rename src/test/java/com/cloudera/utils/hadoop/hms/{ => datastrategy}/DumpTest.java (73%) rename src/test/java/com/cloudera/utils/hadoop/hms/{ => datastrategy}/ExpImpDataMigrationTest.java (92%) rename src/test/java/com/cloudera/utils/hadoop/hms/{ => datastrategy}/HybridDataMigrationTest.java (92%) rename src/test/java/com/cloudera/utils/hadoop/hms/{ => datastrategy}/LegacyConfigValidationTest.java (52%) rename src/test/java/com/cloudera/utils/hadoop/hms/{ => datastrategy}/LegacyDistcpMigrationTest.java (91%) rename src/test/java/com/cloudera/utils/hadoop/hms/{ => datastrategy}/LegacyExpImpDataMigrationTest.java (88%) rename src/test/java/com/cloudera/utils/hadoop/hms/{ => datastrategy}/LegacyHybridDataMigrationTest.java (93%) rename src/test/java/com/cloudera/utils/hadoop/hms/{ => datastrategy}/LegacySQLDataMigrationTest.java (86%) rename src/test/java/com/cloudera/utils/hadoop/hms/{ => datastrategy}/LegacySchemaMigrationTest.java (94%) rename src/test/java/com/cloudera/utils/hadoop/hms/{ => datastrategy}/MirrorTestBase.java (88%) rename src/test/java/com/cloudera/utils/hadoop/hms/{ => datastrategy}/SQLDataMigrationTest.java (94%) rename src/test/java/com/cloudera/utils/hadoop/hms/{ => datastrategy}/SchemaMigrationTest.java (95%) rename src/test/java/com/cloudera/utils/hadoop/hms/{ => datastrategy}/StorageMigrationTest.java (91%) diff --git a/README.md b/README.md index 8636d71c..d984c75a 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,8 @@ The output reports are written in [Markdown](https://www.markdownguide.org/). I * [`--intermediate-storage` Workflow Patterns and Where to Run From](#--intermediate-storage-workflow-patterns-and-where-to-run-from) * [`--common-storage` Workflow Patterns and Where to Run From](#--common-storage-workflow-patterns-and-where-to-run-from) - [Features](#features) + * [Optimizations](#optimizations) + * [Compress Text Output](#compress-text-output) * [VIEWS](#views) * [ACID Tables](#acid-tables) * [Intermediate/Common Storage Options](#intermediatecommon-storage-options) @@ -37,7 +39,6 @@ The output reports are written in [Markdown](https://www.markdownguide.org/). I * [Shared Storage Models (Isilon, Spectrum-Scale, etc.)](#shared-storage-models-isilon-spectrum-scale-etc) * [Disconnected Mode](#disconnected-mode) * [No-Purge Option](#no-purge-option) - * [Skip Optimizations](#skip-optimizations) * [Property Overrides](#property-overrides) * [Global Location Map](#global-location-map) * [Force External Locations](#force-external-locations) @@ -47,7 +48,7 @@ The output reports are written in [Markdown](https://www.markdownguide.org/). I * [HMS-Mirror Setup from Binary Distribution](#hms-mirror-setup-from-binary-distribution) * [Quick Start](#quick-start) * [General Guidance](#general-guidance) -- [Optimizations](#optimizations) +- [Optimizations](#optimizations-1) * [Controlling the YARN Queue that runs the SQL queries from `hms-mirror`](#controlling-the-yarn-queue-that-runs-the-sql-queries-from-hms-mirror) * [Make Backups before running `hms-mirror`](#make-backups-before-running-hms-mirror) * [Isolate Migration Activities](#isolate-migration-activities) @@ -210,6 +211,61 @@ For On-Prem to Cloud migrations, we typically use a `PUSH` model because that is Under certain conditions, `hms-mirror` will 'move' data too. Using the data strategies `-d SQL|EXPORT_IMPORT|HYBRID` well use a combination of SQL temporary tables and [Linking Clusters Storage Layers](#linking-clusters-storage-layers) to facilitate this. +### Optimizations + +The following configuration settings control the various optimizations taken by `hms-mirror`. These settings are mutually exclusive. + +- `-at|--auto-tune` +- `-so|--skip-optimizations` +- `-sdpi|--sort-dynamic-partition-inserts` + +#### Auto-Tune + +`-at|--auto-tune` + +Auto-tuning will use some basic file level statistics about tables/partitions to provide overrides for the following settings: + +- `tez.grouping.max-size` +- `hive.exec.max.dynamic.partitions` +- `hive.exec.reducers.max` + +in addition to these session level setting, we'll use those basic file statistics to construct migration scripts that address things like 'small-files' and 'large' partition datasets. + +We'll set `hive.optimize.sort.dynamic.partition.threshold=-1` and append `DISTRIBUTE BY` to the SQL migration sql statement, just like we do with `-sdpi`. But we'll go one step further and review the average partition size and add an additional 'grouping' element to the SQL to ensure we get efficient writers to a partition. The means that tables with large partition datasets will have more than the standard single writer per partition, preventing the LONG running hanging task that is trying to write a very large partition. + +#### Sort Dynamic Partition Inserts + +`-sdpi|--sort-dynamic-partition-inserts` + +This will set the session property `hive.optimize.sort.dynamic.partition.threshold=0`, which will enable plans to distribute multi partition inserts by the partition key, therefore reducing partitions writes to a single 'writer/reducer'. + +When this isn't set, we set `hive.optimize.sort.dynamic.partition.threshold=-1`, and append `DISTRIBUTE BY` to the SQL migration sql statement to ensure the same behavior of grouping reducers by partition values. + +#### Skip Optimizations + +`-so` + +[Feature Request #23](https://github.com/cloudera-labs/hms-mirror/issues/23) was introduced in v1.5.4.2 and give an option to **Skip Optimizations**. + +When migrating data via SQL with partitioned tables (OR downgrading an ACID table), there are optimizations that we apply to help hive distribute data more efficiently. One method is to use `hive.optimize.sort.dynamic.partition=true` which will "DISTRIBUTE" data along the partitions via a Reduction task. Another is to declare this in SQL with a `DISTRIBUTE BY` clause. + +But there is a corner case where these optimizations can get in the way and cause long-running tasks. If the source table has already been organized into large files (which would be within the partitions already), adding the optimizations above force a single reducer per partition. If the partitions are large and already have good file sizes, we want to skip these optimizations and let hive run the process with only a map task. + +### HDP3 MANAGEDLOCATION Database Property + +[HDP3 doesn't support MAANGEDLOCATION](https://github.com/cloudera-labs/hms-mirror/issues/52) so we've added a property to the cluster configuration to allow the system to *SKIP* setting the `MANAGEDLOCATION` database property in HDP 3 / Hive 3 environments. + +```yaml +clusters: + LEFT: + legacyHive: false + hdpHive3: true +``` + +### Compress Text Output + +`-cto` will control the session level setting for `hive.exec.compress.output'. + ### VIEWS `hms-mirror` now supports the migration of VIEWs between two environments. Use the `-v|--views-only` option to execute this path. VIEW creation requires dependent tables to exist. @@ -344,7 +400,9 @@ If you do NOT want to apply this translation, add the option `-slt|--skip-legacy There are options to filter tables included in `hms-mirror` process. You can select `-tf|--table-filter` to "include" only tables that match this 'regular-expression'. Inversely, use `-etf|--exclude-table-filter` to omit tables from the list. These options are mutually exclusive. -The filter is expressed as a 'regular-expression'. Complex expressions should be enclosed in quotes to ensure the commandline interpreter doesn't split them. +The filters for `-tf` and `-tef` are expressed as a 'regular-expression'. Complex expressions should be enclosed in quotes to ensure the commandline interpreter doesn't split them. + +Additional table filters (`-tfs|--table-filter-size-limit` and `-tfp|--table-filter-partition-count-limit`) that check a tables data size and partition count limits can also be applied to narrow the range of tables you'll process. The filter does NOT override the requirement for options like `-ma|-mao`. It is used as an additional filter. @@ -382,16 +440,6 @@ Note: This will be know as the "right-is-disconnected" option. Which means the p [Feature Request #25](https://github.com/cloudera-labs/hms-mirror/issues/25) was introduced in v1.5.4.2 and gives the user to option to remove the `external.table.purge` option that is added when converting legacy managed tables to external table (Hive 1/2 to 3). This does affect the behavior of the table from the older platforms. -### Skip Optimizations - -`-so` - -[Feature Request #23](https://github.com/cloudera-labs/hms-mirror/issues/23) was introduced in v1.5.4.2 and give an option to **Skip Optimizations**. - -When migrating data via SQL with partitioned tables (OR downgrading an ACID table), there are optimizations that we apply to help hive distribute data more efficiently. One method is to use `hive.optimize.sort.dynamic.partition=true` which will "DISTRIBUTE" data along the partitions via a Reduction task. Another is to declare this in SQL with a `DISTRIBUTE BY` clause. - -But there is a corner case where these optimizations can get in the way and cause long-running tasks. If the source table has already been organized into large files (which would be within the partitions already), adding the optimizations above force a single reducer per partition. If the partitions are large and already have good file sizes, we want to skip these optimizations and let hive run the process with only a map task. - ### Property Overrides `-po[l|r] [,]...` @@ -524,6 +572,8 @@ Use the jdbc url defined in `default.yaml` to set a queue. `jdbc:hive2://host:10000/.....;...?tez.queue.name=batch` +The commandline properties `-po`, `-pol`, and `-por` can be used to override the queue name as well. For example: `-pol tez.queue.name=batch` will set the queue for the "LEFT" cluster while `-por tez.queue.name=migration` will set the queue for the "RIGHT" cluster. + ### Make Backups before running `hms-mirror` Take snapshots of areas you'll touch: @@ -532,7 +582,6 @@ Take snapshots of areas you'll touch: > NOTE: If you are testing and "DROPPING" dbs, Snapshots of those data directories could protect you from accidental deletions if you don't manage purge options correctly. Don't skip this... > A snapshot of the db directory on HDFS will prevent `DROP DATABASE x CASCADE` from removing the DB directory (observed in CDP 7.1.4+ as tested, check your version) and all sub-directories even though tables were NOT configured with `purge` options. - ### Isolate Migration Activities The migration of schemas can put a heavy load on HS2 and the HMS server it's using. That impact can manifest itself as 'pauses' for other clients trying to run queries. Extended schema/discovery operations have a 'blocking' tendency in HS2. @@ -588,6 +637,14 @@ The default batch size for partition discovery via `msck` is 3000. Adjustments ## Pre-Requisites +### Hive/TEZ Properties Whitelist Requirements + +HiveServer2 has restrictions on what properties can be set by the user in a session. To ensure that `hms-mirror` will be able to set the properties it needs, add [`hive.security.authorization.sqlstd.confwhitelist.append`](https://cwiki.apache.org/confluence/display/Hive/Configuration+Properties#ConfigurationProperties-hive.security.authorization.sqlstd.confwhitelist.append) property in the HiveServer2 Advanced Configuration Snippet (Safety Valve) for `hive-site.xml` with at least the following value(s) so `hms-mirror` can set the properties it needs: + +``` +tez\.grouping\..* +``` + ### Backups DO NOT SKIP THIS!!! @@ -845,221 +902,294 @@ When you do need to move data, `hms-mirror` create a workbook of 'source' and 't ``` usage: hms-mirror - version:1.5.5.x + version:1.5.6.0+ Hive Metastore Migration Utility - -accept,--accept Accept ALL confirmations and silence prompts - -ap,--acid-partition-count Set the limit of partitions that the ACID - strategy will work with. '-1' means no-limit. - -asm,--avro-schema-migration Migrate AVRO Schema Files referenced in - TBLPROPERTIES by 'avro.schema.url'. Without - migration it is expected that the file will exist - on the other cluster and match the 'url' defined - in the schema DDL. - If it's not present, schema creation will FAIL. - Specifying this option REQUIRES the LEFT and - RIGHT cluster to be LINKED. - See docs: - https://github.com/cloudera-labs/hms-mirror#linki - ng-clusters-storage-layers - -cfg,--config Config with details for the HMS-Mirror. Default: - $HOME/.hms-mirror/cfg/default.yaml - -cs,--common-storage Common Storage used with Data Strategy HYBRID, - SQL, EXPORT_IMPORT. This will change the way - these methods are implemented by using the - specified storage location as an 'common' storage - point between two clusters. In this case, the - cluster do NOT need to be 'linked'. Each cluster - DOES need to have access to the location and - authorization to interact with the location. - This may mean additional configuration - requirements for 'hdfs' to ensure this seamless - access. - -d,--data-strategy Specify how the data will follow the schema. - [DUMP, SCHEMA_ONLY, LINKED, SQL, EXPORT_IMPORT, - HYBRID, CONVERT_LINKED, STORAGE_MIGRATION, - COMMON] - -da,--downgrade-acid Downgrade ACID tables to EXTERNAL tables with - purge. - -db,--database Comma separated list of Databases (upto 100). - -dbo,--database-only Migrate the Database definitions as they exist - from LEFT to RIGHT - -dbp,--db-prefix Optional: A prefix to add to the RIGHT cluster DB - Name. Usually used for testing. - -dbr,--db-rename Optional: Rename target db to ... This option is - only valid when '1' database is listed in `-db`. - -dc,--distcp Build the 'distcp' workplans. Optional argument - (PULL, PUSH) to define which cluster is running - the distcp commands. Default is PULL. - -dp,--decrypt-password Used this in conjunction with '-pkey' to decrypt - the generated passcode from `-p`. - -ds,--dump-source Specify which 'cluster' is the source for the - DUMP strategy (LEFT|RIGHT). - -e,--execute Execute actions request, without this flag the - process is a dry-run. - -ep,--export-partition-count Set the limit of partitions that the - EXPORT_IMPORT strategy will work with. - -ewd,--external-warehouse-directory The external warehouse directory path. Should - not include the namespace OR the database - directory. This will be used to set the LOCATION - database option. - -f,--flip Flip the definitions for LEFT and RIGHT. Allows - the same config to be used in reverse. - -fel,--force-external-location Under some conditions, the LOCATION element for - EXTERNAL tables is removed (ie: -rdl). In which - case we rely on the settings of the database - definition to control the EXTERNAL table data - location. But for some older Hive versions, the - LOCATION element in the database is NOT honored. - Even when the database LOCATION is set, the - EXTERNAL table LOCATION defaults to the system - wide warehouse settings. This flag will ensure - the LOCATION element remains in the CREATE - definition of the table to force it's location. - -glm,--global-location-map Comma separated key=value pairs of Locations to - Map. IE: /myorig/data/finance=/data/ec/finance. - This reviews 'EXTERNAL' table locations for the - path '/myorig/data/finance' and replaces it with - '/data/ec/finance'. Option can be used alone or - with -rdl. Only applies to 'EXTERNAL' tables and - if the tables location doesn't contain one of the - supplied maps, it will be translated according to - -rdl rules if -rdl is specified. If -rdl is not - specified, the conversion for that table is - skipped. - -h,--help Help - -ip,--in-place Downgrade ACID tables to EXTERNAL tables with - purge. - -is,--intermediate-storage Intermediate Storage used with Data Strategy - HYBRID, SQL, EXPORT_IMPORT. This will change the - way these methods are implemented by using the - specified storage location as an intermediate - transfer point between two clusters. In this - case, the cluster do NOT need to be 'linked'. - Each cluster DOES need to have access to the - location and authorization to interact with the - location. This may mean additional configuration - requirements for 'hdfs' to ensure this seamless - access. - -ma,--migrate-acid Migrate ACID tables (if strategy allows). - Optional: ArtificialBucketThreshold count that - will remove the bucket definition if it's below - this. Use this as a way to remove artificial - bucket definitions that were added 'artificially' - in legacy Hive. (default: 2) - -mao,--migrate-acid-only Migrate ACID tables ONLY (if strategy allows). - Optional: ArtificialBucketThreshold count that - will remove the bucket definition if it's below - this. Use this as a way to remove artificial - bucket definitions that were added 'artificially' - in legacy Hive. (default: 2) - -mnn,--migrate-non-native Migrate Non-Native tables (if strategy allows). - These include table definitions that rely on - external connection to systems like: HBase, - Kafka, JDBC - -mnno,--migrate-non-native-only Migrate Non-Native tables (if strategy allows). - These include table definitions that rely on - external connection to systems like: HBase, - Kafka, JDBC - -np,--no-purge For SCHEMA_ONLY, COMMON, and LINKED data - strategies set RIGHT table to NOT purge on DROP - -o,--output-dir Output Directory (default: - $HOME/.hms-mirror/reports/ - -p,--password Used this in conjunction with '-pkey' to generate - the encrypted password that you'll add to the - configs for the JDBC connections. - -pkey,--password-key The key used to encrypt / decrypt the cluster - jdbc passwords. If not present, the passwords - will be processed as is (clear text) from the - config file. - -po,--property-overrides Comma separated key=value pairs of Hive - properties you wish to set/override. - -pol,--property-overrides-left Comma separated key=value pairs of Hive - properties you wish to set/override for LEFT - cluster. - -por,--property-overrides-right Comma separated key=value pairs of Hive - properties you wish to set/override for RIGHT - cluster. - -q,--quiet Reduce screen reporting output. Good for - background processes with output redirects to a - file - -rdl,--reset-to-default-location Strip 'LOCATION' from all target cluster - definitions. This will allow the system defaults - to take over and define the location of the new - datasets. - -rid,--right-is-disconnected Don't attempt to connect to the 'right' cluster - and run in this mode - -ro,--read-only For SCHEMA_ONLY, COMMON, and LINKED data - strategies set RIGHT table to NOT purge on DROP. - Intended for use with replication distcp - strategies and has restrictions about existing - DB's on RIGHT and PATH elements. To simply NOT - set the purge flag for applicable tables, use - -np. - -rr,--reset-right Use this for testing to remove the database on - the RIGHT using CASCADE. - -s,--sync For SCHEMA_ONLY, COMMON, and LINKED data - strategies. Drop and Recreate Schema's when - different. Best to use with RO to ensure - table/partition drops don't delete data. When - used WITHOUT `-tf` it will compare all the tables - in a database and sync (bi-directional). Meaning - it will DROP tables on the RIGHT that aren't in - the LEFT and ADD tables to the RIGHT that are - missing. When used with `-ro`, table schemas can - be updated by dropping and recreating. When used - with `-tf`, only the tables that match the filter - (on both sides) will be considered. - -sdpi,--sort-dynamic-partition-inserts Used to set - `hive.optimize.sort.dynamic.partition` in TEZ for - optimal partition inserts. When not specified, - will use prescriptive sorting by adding - 'DISTRIBUTE BY' to transfer SQL. default: false - -sf,--skip-features Skip Features evaluation. - -slc,--skip-link-check Skip Link Check. Use when going between or to - Cloud Storage to avoid having to configure - hms-mirror with storage credentials and - libraries. This does NOT preclude your Hive - Server 2 and compute environment from such - requirements. - -slt,--skip-legacy-translation Skip Schema Upgrades and Serde Translations - -smn,--storage-migration-namespace Optional: Used with the 'data strategy - STORAGE_MIGRATION to specify the target - namespace. - -so,--skip-optimizations Skip any optimizations during data movement, like - dynamic sorting or distribute by - -sp,--sql-partition-count Set the limit of partitions that the SQL strategy - will work with. '-1' means no-limit. - -sql,--sql-output . This option is no longer required - to get SQL out in a report. That is the default - behavior. - -su,--setup Setup a default configuration file through a - series of questions - -tef,--table-exclude-filter Filter tables (excludes) with name matching - RegEx. Comparison done with 'show tables' - results. Check case, that's important. Hive - tables are generally stored in LOWERCASE. Make - sure you double-quote the expression on the - commandline. - -tf,--table-filter Filter tables (inclusive) with name matching - RegEx. Comparison done with 'show tables' - results. Check case, that's important. Hive - tables are generally stored in LOWERCASE. Make - sure you double-quote the expression on the - commandline. - -to,--transfer-ownership If available (supported) on LEFT cluster, extract - and transfer the tables owner to the RIGHT - cluster. Note: This will make an 'exta' SQL call - on the LEFT cluster to determine the ownership. - This won't be supported on CDH 5 and some other - legacy Hive platforms. Beware the cost of this - extra call for EVERY table, as it may slow down - the process for a large volume of tables. - -v,--views-only Process VIEWs ONLY - -wd,--warehouse-directory The warehouse directory path. Should not include - the namespace OR the database directory. This - will be used to set the MANAGEDLOCATION database - option. - + -accept,--accept Accept ALL confirmations and silence + prompts + -ap,--acid-partition-count Set the limit of partitions that the + ACID strategy will work with. '-1' + means no-limit. + -asm,--avro-schema-migration Migrate AVRO Schema Files referenced + in TBLPROPERTIES by + 'avro.schema.url'. Without migration + it is expected that the file will + exist on the other cluster and match + the 'url' defined in the schema DDL. + If it's not present, schema creation + will FAIL. + Specifying this option REQUIRES the + LEFT and RIGHT cluster to be LINKED. + See docs: + https://github.com/cloudera-labs/hms- + mirror#linking-clusters-storage-layer + s + -at,--auto-tune Auto-tune Session Settings for + SELECT's and DISTRIBUTION for + Partition INSERT's. + -cfg,--config Config with details for the + HMS-Mirror. Default: + $HOME/.hms-mirror/cfg/default.yaml + -cs,--common-storage Common Storage used with Data + Strategy HYBRID, SQL, EXPORT_IMPORT. + This will change the way these + methods are implemented by using the + specified storage location as an + 'common' storage point between two + clusters. In this case, the cluster + do NOT need to be 'linked'. Each + cluster DOES need to have access to + the location and authorization to + interact with the location. This may + mean additional configuration + requirements for 'hdfs' to ensure + this seamless access. + -cto,--compress-test-output Data movement (SQL/STORAGE_MIGRATION) + of TEXT based file formats will be + compressed in the new table. + -d,--data-strategy Specify how the data will follow the + schema. [DUMP, SCHEMA_ONLY, LINKED, + SQL, EXPORT_IMPORT, HYBRID, + CONVERT_LINKED, STORAGE_MIGRATION, + COMMON] + -da,--downgrade-acid Downgrade ACID tables to EXTERNAL + tables with purge. + -db,--database Comma separated list of Databases + (upto 100). + -dbo,--database-only Migrate the Database definitions as + they exist from LEFT to RIGHT + -dbp,--db-prefix Optional: A prefix to add to the + RIGHT cluster DB Name. Usually used + for testing. + -dbr,--db-rename Optional: Rename target db to ... + This option is only valid when '1' + database is listed in `-db`. + -dbRegEx,--database-regex RegEx of Database to include in + process. + -dc,--distcp Build the 'distcp' workplans. + Optional argument (PULL, PUSH) to + define which cluster is running the + distcp commands. Default is PULL. + -dp,--decrypt-password Used this in conjunction with '-pkey' + to decrypt the generated passcode + from `-p`. + -ds,--dump-source Specify which 'cluster' is the source + for the DUMP strategy (LEFT|RIGHT). + -e,--execute Execute actions request, without this + flag the process is a dry-run. + -ep,--export-partition-count Set the limit of partitions that the + EXPORT_IMPORT strategy will work + with. + -ewd,--external-warehouse-directory The external warehouse directory + path. Should not include the + namespace OR the database directory. + This will be used to set the LOCATION + database option. + -f,--flip Flip the definitions for LEFT and + RIGHT. Allows the same config to be + used in reverse. + -fel,--force-external-location Under some conditions, the LOCATION + element for EXTERNAL tables is + removed (ie: -rdl). In which case we + rely on the settings of the database + definition to control the EXTERNAL + table data location. But for some + older Hive versions, the LOCATION + element in the database is NOT + honored. Even when the database + LOCATION is set, the EXTERNAL table + LOCATION defaults to the system wide + warehouse settings. This flag will + ensure the LOCATION element remains + in the CREATE definition of the table + to force it's location. + -glm,--global-location-map Comma separated key=value pairs of + Locations to Map. IE: + /myorig/data/finance=/data/ec/finance + . This reviews 'EXTERNAL' table + locations for the path + '/myorig/data/finance' and replaces + it with '/data/ec/finance'. Option + can be used alone or with -rdl. Only + applies to 'EXTERNAL' tables and if + the tables location doesn't contain + one of the supplied maps, it will be + translated according to -rdl rules if + -rdl is specified. If -rdl is not + specified, the conversion for that + table is skipped. + -h,--help Help + -ip,--in-place Downgrade ACID tables to EXTERNAL + tables with purge. + -is,--intermediate-storage Intermediate Storage used with Data + Strategy HYBRID, SQL, EXPORT_IMPORT. + This will change the way these + methods are implemented by using the + specified storage location as an + intermediate transfer point between + two clusters. In this case, the + cluster do NOT need to be 'linked'. + Each cluster DOES need to have access + to the location and authorization to + interact with the location. This may + mean additional configuration + requirements for 'hdfs' to ensure + this seamless access. + -ma,--migrate-acid Migrate ACID tables (if strategy + allows). Optional: + ArtificialBucketThreshold count that + will remove the bucket definition if + it's below this. Use this as a way + to remove artificial bucket + definitions that were added + 'artificially' in legacy Hive. + (default: 2) + -mao,--migrate-acid-only Migrate ACID tables ONLY (if strategy + allows). Optional: + ArtificialBucketThreshold count that + will remove the bucket definition if + it's below this. Use this as a way + to remove artificial bucket + definitions that were added + 'artificially' in legacy Hive. + (default: 2) + -mnn,--migrate-non-native Migrate Non-Native tables (if + strategy allows). These include table + definitions that rely on external + connection to systems like: HBase, + Kafka, JDBC + -mnno,--migrate-non-native-only Migrate Non-Native tables (if + strategy allows). These include table + definitions that rely on external + connection to systems like: HBase, + Kafka, JDBC + -np,--no-purge For SCHEMA_ONLY, COMMON, and LINKED + data strategies set RIGHT table to + NOT purge on DROP + -o,--output-dir Output Directory (default: + $HOME/.hms-mirror/reports/ + -p,--password Used this in conjunction with '-pkey' + to generate the encrypted password + that you'll add to the configs for + the JDBC connections. + -pkey,--password-key The key used to encrypt / decrypt the + cluster jdbc passwords. If not + present, the passwords will be + processed as is (clear text) from the + config file. + -po,--property-overrides Comma separated key=value pairs of + Hive properties you wish to + set/override. + -pol,--property-overrides-left Comma separated key=value pairs of + Hive properties you wish to + set/override for LEFT cluster. + -por,--property-overrides-right Comma separated key=value pairs of + Hive properties you wish to + set/override for RIGHT cluster. + -q,--quiet Reduce screen reporting output. Good + for background processes with output + redirects to a file + -rdl,--reset-to-default-location Strip 'LOCATION' from all target + cluster definitions. This will allow + the system defaults to take over and + define the location of the new + datasets. + -rid,--right-is-disconnected Don't attempt to connect to the + 'right' cluster and run in this mode + -ro,--read-only For SCHEMA_ONLY, COMMON, and LINKED + data strategies set RIGHT table to + NOT purge on DROP. Intended for use + with replication distcp strategies + and has restrictions about existing + DB's on RIGHT and PATH elements. To + simply NOT set the purge flag for + applicable tables, use -np. + -rr,--reset-right Use this for testing to remove the + database on the RIGHT using CASCADE. + -s,--sync For SCHEMA_ONLY, COMMON, and LINKED + data strategies. Drop and Recreate + Schema's when different. Best to use + with RO to ensure table/partition + drops don't delete data. When used + WITHOUT `-tf` it will compare all the + tables in a database and sync + (bi-directional). Meaning it will + DROP tables on the RIGHT that aren't + in the LEFT and ADD tables to the + RIGHT that are missing. When used + with `-ro`, table schemas can be + updated by dropping and recreating. + When used with `-tf`, only the tables + that match the filter (on both sides) + will be considered. + -sdpi,--sort-dynamic-partition-inserts Used to set + `hive.optimize.sort.dynamic.partition + ` in TEZ for optimal partition + inserts. When not specified, will + use prescriptive sorting by adding + 'DISTRIBUTE BY' to transfer SQL. + default: false + -sf,--skip-features Skip Features evaluation. + -slc,--skip-link-check Skip Link Check. Use when going + between or to Cloud Storage to avoid + having to configure hms-mirror with + storage credentials and libraries. + This does NOT preclude your Hive + Server 2 and compute environment from + such requirements. + -slt,--skip-legacy-translation Skip Schema Upgrades and Serde + Translations + -smn,--storage-migration-namespace Optional: Used with the 'data + strategy STORAGE_MIGRATION to specify + the target namespace. + -so,--skip-optimizations Skip any optimizations during data + movement, like dynamic sorting or + distribute by + -sp,--sql-partition-count Set the limit of partitions that the + SQL strategy will work with. '-1' + means no-limit. + -sql,--sql-output . This option is no + longer required to get SQL out in a + report. That is the default + behavior. + -su,--setup Setup a default configuration file + through a series of questions + -tef,--table-exclude-filter Filter tables (excludes) with name + matching RegEx. Comparison done with + 'show tables' results. Check case, + that's important. Hive tables are + generally stored in LOWERCASE. Make + sure you double-quote the expression + on the commandline. + -tf,--table-filter Filter tables (inclusive) with name + matching RegEx. Comparison done with + 'show tables' results. Check case, + that's important. Hive tables are + generally stored in LOWERCASE. Make + sure you double-quote the expression + on the commandline. + -tfp,--table-filter-partition-count-limit Filter partition tables OUT that are + have more than specified here. Non + Partitioned table aren't filtered. + -tfs,--table-filter-size-limit Filter tables OUT that are above the + indicated size. Expressed in MB + -to,--transfer-ownership If available (supported) on LEFT + cluster, extract and transfer the + tables owner to the RIGHT cluster. + Note: This will make an 'exta' SQL + call on the LEFT cluster to determine + the ownership. This won't be + supported on CDH 5 and some other + legacy Hive platforms. Beware the + cost of this extra call for EVERY + table, as it may slow down the + process for a large volume of tables. + -v,--views-only Process VIEWs ONLY + -wd,--warehouse-directory The warehouse directory path. Should + not include the namespace OR the + database directory. This will be used + to set the MANAGEDLOCATION database + option. ``` ### Running Against a LEGACY (Non-CDP) Kerberized HiveServer2 diff --git a/pom.xml b/pom.xml index ddb96efb..bfac7017 100644 --- a/pom.xml +++ b/pom.xml @@ -22,7 +22,7 @@ com.cloudera.utils.hadoop hms-mirror - 1.5.5.1-SNAPSHOT + 1.5.6.0-SNAPSHOT hms-mirror https://github.com/cloudera-labs/hms_mirror diff --git a/src/main/java/com/cloudera/utils/hadoop/hms/Context.java b/src/main/java/com/cloudera/utils/hadoop/hms/Context.java new file mode 100644 index 00000000..079cb4f0 --- /dev/null +++ b/src/main/java/com/cloudera/utils/hadoop/hms/Context.java @@ -0,0 +1,33 @@ +package com.cloudera.utils.hadoop.hms; + +import com.cloudera.utils.hadoop.hms.mirror.Config; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class Context { + private static final Context instance = new Context(); + private List supportFileSystems = new ArrayList(Arrays.asList( + "hdfs","ofs","s3","s3a","s3n","wasb","adls","gf" + )); + private Config config = null; + + private Context() {}; + + public static Context getInstance() { + return instance; + } + + public Config getConfig() { + return config; + } + + public void setConfig(Config config) { + this.config = config; + } + + public List getSupportFileSystems() { + return supportFileSystems; + } +} diff --git a/src/main/java/com/cloudera/utils/hadoop/hms/Mirror.java b/src/main/java/com/cloudera/utils/hadoop/hms/Mirror.java index 2c51a427..611e0c1e 100644 --- a/src/main/java/com/cloudera/utils/hadoop/hms/Mirror.java +++ b/src/main/java/com/cloudera/utils/hadoop/hms/Mirror.java @@ -24,6 +24,7 @@ import com.cloudera.utils.hadoop.hms.util.Protect; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.SerializationFeature; +import com.fasterxml.jackson.databind.exc.UnrecognizedPropertyException; import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; import com.google.common.collect.Sets; import org.apache.commons.cli.*; @@ -224,14 +225,18 @@ public long init(String[] args) { System.out.println("Using Config: " + configFile); String yamlCfgFile = FileUtils.readFileToString(cfgFile, StandardCharsets.UTF_8); config = mapper.readerFor(Config.class).readValue(yamlCfgFile); + Context.getInstance().setConfig(config); + } catch (UnrecognizedPropertyException upe) { + System.out.println("\n>>>>> READ THIS BEFORE CONTINUING. Minor configuration fix REQUIRED. <<<<<"); + throw new RuntimeException("\nThere may have been a breaking change in the configuration since the previous " + + "release. Review the note below and remove the 'Unrecognized field' from the configuration and try " + + "again.\n\n", upe); } catch (Throwable t) { // Look for yaml update errors. if (t.toString().contains("MismatchedInputException")) { throw new RuntimeException("The format of the 'config' yaml file MAY HAVE CHANGED from the last release. Please make a copy and run " + "'-su|--setup' again to recreate in the new format", t); } else { -// config = new Config(); -// config.getErrors().set(CONFIGURATION_REMOVED_OR_INVALID.getCode(), t.getMessage()); LOG.error(t); throw new RuntimeException("A configuration element is no longer valid, progress. Please remove the element from the configuration yaml and try again.", t); } @@ -389,9 +394,23 @@ public long init(String[] args) { } } + // Skip Optimizations. + if (cmd.hasOption("so")) { + config.getOptimization().setSkip(Boolean.TRUE); + } + // Sort Dynamic Partitions if (cmd.hasOption("sdpi")) { config.getOptimization().setSortDynamicPartitionInserts(Boolean.TRUE); } + // AutoTune. + if (cmd.hasOption("at")) { + config.getOptimization().setAutoTune(Boolean.TRUE); + } + + //Compress TEXT Output. + if (cmd.hasOption("cto")) { + config.getOptimization().setCompressTextOutput(Boolean.TRUE); + } if (cmd.hasOption("po")) { // property overrides. @@ -414,11 +433,6 @@ public long init(String[] args) { config.getOptimization().getOverrides().setPropertyOverridesStr(overrides, Overrides.Side.RIGHT); } - // Skip Optimizations. - if (cmd.hasOption("so")) { - config.getOptimization().setSkip(Boolean.TRUE); - } - if (cmd.hasOption("mnn")) { config.setMigratedNonNative(Boolean.TRUE); } @@ -606,15 +620,23 @@ public long init(String[] args) { } if (cmd.hasOption("dbRegEx")) { - config.setDbRegEx(cmd.getOptionValue("dbRegEx")); + config.getFilter().setDbRegEx(cmd.getOptionValue("dbRegEx")); } if (cmd.hasOption("tf")) { - config.setTblRegEx(cmd.getOptionValue("tf")); + config.getFilter().setTblRegEx(cmd.getOptionValue("tf")); } if (cmd.hasOption("tef")) { - config.setTblExcludeRegEx(cmd.getOptionValue("tef")); + config.getFilter().setTblExcludeRegEx(cmd.getOptionValue("tef")); + } + + if (cmd.hasOption("tfs")) { + config.getFilter().setTblSizeLimit(Long.parseLong(cmd.getOptionValue("tfs"))); + } + + if (cmd.hasOption("tfp")) { + config.getFilter().setTblPartitionLimit(Integer.parseInt(cmd.getOptionValue("tfp"))); } } @@ -665,10 +687,6 @@ public long init(String[] args) { config.setDatabases(databases); } - if (config.getDatabases() == null || config.getDatabases().length == 0) { - throw new RuntimeException("No databases specified"); - } - if (cmd.hasOption("e") && config.getDataStrategy() != DataStrategy.DUMP) { if (cmd.hasOption("accept")) { config.getAcceptance().setSilentOverride(Boolean.TRUE); @@ -1102,6 +1120,7 @@ public void doit() { } reportFile.write(mdReportStr); + reportFile.flush(); reportFile.close(); // Convert to HTML List extensions = Arrays.asList(TablesExtension.create(), YamlFrontMatterExtension.create()); @@ -1352,10 +1371,33 @@ private Options getOptions() { propertyRightOverrides.setArgs(100); options.addOption(propertyRightOverrides); + OptionGroup optimizationsGroup = new OptionGroup(); + optimizationsGroup.setRequired(Boolean.FALSE); + Option skipOptimizationsOption = new Option("so", "skip-optimizations", false, "Skip any optimizations during data movement, like dynamic sorting or distribute by"); skipOptimizationsOption.setRequired(Boolean.FALSE); - options.addOption(skipOptimizationsOption); + optimizationsGroup.addOption(skipOptimizationsOption); + + Option sdpiOption = new Option("sdpi", "sort-dynamic-partition-inserts", false, + "Used to set `hive.optimize.sort.dynamic.partition` in TEZ for optimal partition inserts. " + + "When not specified, will use prescriptive sorting by adding 'DISTRIBUTE BY' to transfer SQL. " + + "default: false"); + sdpiOption.setRequired(Boolean.FALSE); + optimizationsGroup.addOption(sdpiOption); + + Option autoTuneOption = new Option("at", "auto-tune", false, + "Auto-tune Session Settings for SELECT's and DISTRIBUTION for Partition INSERT's."); + autoTuneOption.setRequired(Boolean.FALSE); + optimizationsGroup.addOption(autoTuneOption); + + options.addOptionGroup(optimizationsGroup); + + Option compressTextOutputOption = new Option("cto", "compress-test-output", false, + "Data movement (SQL/STORAGE_MIGRATION) of TEXT based file formats will be compressed in the new " + + "table."); + compressTextOutputOption.setRequired(Boolean.FALSE); + options.addOption(compressTextOutputOption); Option forceExternalLocationOption = new Option("fel", "force-external-location", false, "Under some conditions, the LOCATION element for EXTERNAL tables is removed (ie: -rdl). " + @@ -1452,13 +1494,6 @@ private Options getOptions() { mnnoOption.setRequired(Boolean.FALSE); migrationOptionsGroup.addOption(mnnoOption); - Option sdpiOption = new Option("sdpi", "sort-dynamic-partition-inserts", false, - "Used to set `hive.optimize.sort.dynamic.partition` in TEZ for optimal partition inserts. " + - "When not specified, will use prescriptive sorting by adding 'DISTRIBUTE BY' to transfer SQL. " + - "default: false"); - sdpiOption.setRequired(Boolean.FALSE); - options.addOption(sdpiOption); - Option viewOption = new Option("v", "views-only", false, "Process VIEWs ONLY"); viewOption.setRequired(false); @@ -1617,6 +1652,11 @@ private Options getOptions() { dbOption.setArgName("databases"); dbOption.setArgs(100); + Option dbRegExOption = new Option("dbRegEx", "database-regex", true, + "RegEx of Database to include in process."); + dbRegExOption.setRequired(Boolean.FALSE); + dbRegExOption.setArgName("regex"); + Option helpOption = new Option("h", "help", false, "Help"); helpOption.setRequired(Boolean.FALSE); @@ -1643,6 +1683,7 @@ private Options getOptions() { OptionGroup dbGroup = new OptionGroup(); dbGroup.addOption(dbOption); + dbGroup.addOption(dbRegExOption); dbGroup.addOption(helpOption); dbGroup.addOption(setupOption); dbGroup.addOption(pwOption); @@ -1694,6 +1735,19 @@ private Options getOptions() { options.addOptionGroup(filterGroup); + Option tableSizeFilterOption = new Option("tfs", "table-filter-size-limit", true, + "Filter tables OUT that are above the indicated size. Expressed in MB"); + tableSizeFilterOption.setRequired(Boolean.FALSE); + tableSizeFilterOption.setArgName("size MB"); + options.addOption(tableSizeFilterOption); + + Option tablePartitionCountFilterOption = new Option("tfp", "table-filter-partition-count-limit", true, + "Filter partition tables OUT that are have more than specified here. Non Partitioned table aren't " + + "filtered."); + tablePartitionCountFilterOption.setRequired(Boolean.FALSE); + tablePartitionCountFilterOption.setArgName("partition-count"); + options.addOption(tablePartitionCountFilterOption); + Option cfgOption = new Option("cfg", "config", true, "Config with details for the HMS-Mirror. Default: $HOME/.hms-mirror/cfg/default.yaml"); cfgOption.setRequired(false); @@ -1703,19 +1757,19 @@ private Options getOptions() { return options; } - protected Boolean setupSql(Environment environment, List sqlPairList) { + public Boolean setupSql(Environment environment, List sqlPairList) { Boolean rtn = Boolean.TRUE; rtn = config.getCluster(environment).runClusterSql(sqlPairList); return rtn; } - protected long setupSqlLeft(String[] args, List sqlPairList) { + public long setupSqlLeft(String[] args, List sqlPairList) { long rtn = 0l; rtn = setupSql(args, sqlPairList, null); return rtn; } - protected long setupSqlRight(String[] args, List sqlPairList) { + public long setupSqlRight(String[] args, List sqlPairList) { long rtn = 0l; rtn = setupSql(args, null, sqlPairList); return rtn; @@ -1806,7 +1860,6 @@ public long go(String[] args) { } else { returnCode = -1; } - System.err.println(e.getMessage()); e.printStackTrace(); System.err.println("\nSee log for stack trace ($HOME/.hms-mirror/logs)"); } finally { diff --git a/src/main/java/com/cloudera/utils/hadoop/hms/mirror/Cluster.java b/src/main/java/com/cloudera/utils/hadoop/hms/mirror/Cluster.java index 1c0ba214..eb89a5ff 100644 --- a/src/main/java/com/cloudera/utils/hadoop/hms/mirror/Cluster.java +++ b/src/main/java/com/cloudera/utils/hadoop/hms/mirror/Cluster.java @@ -16,18 +16,23 @@ package com.cloudera.utils.hadoop.hms.mirror; +import com.cloudera.utils.hadoop.HadoopSession; +import com.cloudera.utils.hadoop.hms.Context; +import com.cloudera.utils.hadoop.shell.command.CommandReturn; import com.fasterxml.jackson.annotation.JsonIgnore; import com.cloudera.utils.hadoop.hms.util.TableUtils; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import java.sql.*; -import java.text.DateFormat; -import java.text.MessageFormat; -import java.text.SimpleDateFormat; +import java.text.*; import java.util.*; import java.util.regex.Matcher; +import static com.cloudera.utils.hadoop.hms.mirror.DataStrategy.DUMP; +import static com.cloudera.utils.hadoop.hms.mirror.MirrorConf.*; +import static com.cloudera.utils.hadoop.hms.mirror.TablePropertyVars.HMS_STORAGE_MIGRATION_FLAG; + public class Cluster implements Comparable { private static final Logger LOG = LogManager.getLogger(Cluster.class); private final DateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); @@ -243,7 +248,7 @@ public void getTables(Config config, DBMirror dbMirror) throws SQLException { if (!this.getLegacyHive()) { if (config.getMigrateVIEW().isOn()) { shows.add(MirrorConf.SHOW_VIEWS); - if (config.getDataStrategy() == DataStrategy.DUMP) { + if (config.getDataStrategy() == DUMP) { shows.add(MirrorConf.SHOW_TABLES); } } else { @@ -265,20 +270,20 @@ public void getTables(Config config, DBMirror dbMirror) throws SQLException { "The name is the result of a previous STORAGE_MIGRATION attempt that has not been " + "cleaned up."); } else { - if (config.getTblRegEx() == null && config.getTblExcludeRegEx() == null) { + if (config.getFilter().getTblRegEx() == null && config.getFilter().getTblExcludeRegEx() == null) { TableMirror tableMirror = dbMirror.addTable(tableName); tableMirror.setMigrationStageMessage("Added to evaluation inventory"); - } else if (config.getTblRegEx() != null) { + } else if (config.getFilter().getTblRegEx() != null) { // Filter Tables - assert (config.getTblFilterPattern() != null); - Matcher matcher = config.getTblFilterPattern().matcher(tableName); + assert (config.getFilter().getTblFilterPattern() != null); + Matcher matcher = config.getFilter().getTblFilterPattern().matcher(tableName); if (matcher.matches()) { TableMirror tableMirror = dbMirror.addTable(tableName); tableMirror.setMigrationStageMessage("Added to evaluation inventory"); } - } else if (config.getTblExcludeRegEx() != null) { - assert (config.getTblExcludeFilterPattern() != null); - Matcher matcher = config.getTblExcludeFilterPattern().matcher(tableName); + } else if (config.getFilter().getTblExcludeRegEx() != null) { + assert (config.getFilter().getTblExcludeFilterPattern() != null); + Matcher matcher = config.getFilter().getTblExcludeFilterPattern().matcher(tableName); if (!matcher.matches()) { // ANTI-MATCH TableMirror tableMirror = dbMirror.addTable(tableName); tableMirror.setMigrationStageMessage("Added to evaluation inventory"); @@ -322,8 +327,9 @@ public void getTables(Config config, DBMirror dbMirror) throws SQLException { } - public void getTableDefinition(Config config, String database, TableMirror tableMirror) throws SQLException { + public void getTableDefinition(String database, TableMirror tableMirror) throws SQLException { // The connection should already be in the database; + Config config = Context.getInstance().getConfig(); Connection conn = null; try { conn = getConnection(); @@ -363,7 +369,7 @@ public void getTableDefinition(Config config, String database, TableMirror table tableMirror.addStep(getEnvironment().toString(), "Fetched Schema"); if (this.environment == Environment.LEFT) { - if (config.getMigrateVIEW().isOn() && config.getDataStrategy() != DataStrategy.DUMP) { + if (config.getMigrateVIEW().isOn() && config.getDataStrategy() != DUMP) { if (!TableUtils.isView(et)) { tableMirror.setRemove(Boolean.TRUE); tableMirror.setRemoveReason("VIEW's only processing selected."); @@ -386,7 +392,7 @@ public void getTableDefinition(Config config, String database, TableMirror table tableMirror.setRemoveReason("Non-ACID table and ACID only processing selected `-mao`"); } } else if (TableUtils.isView(et)) { - if (config.getDataStrategy() != DataStrategy.DUMP) { + if (config.getDataStrategy() != DUMP) { tableMirror.setRemove(Boolean.TRUE); tableMirror.setRemoveReason("This is a VIEW and VIEW processing wasn't selected."); } @@ -399,9 +405,58 @@ public void getTableDefinition(Config config, String database, TableMirror table } } } - Boolean partitioned = TableUtils.isPartitioned(et.getName(), et.getDefinition()); + Boolean partitioned = TableUtils.isPartitioned(et); if (partitioned) { loadTablePartitionMetadata(conn, database, et); + // Check for table partition count filter + if (config.getFilter().getTblPartitionLimit() != null) { + Integer partLimit = config.getFilter().getTblPartitionLimit(); + if (et.getPartitions().size() > partLimit) { + tableMirror.setRemove(Boolean.TRUE); + tableMirror.setRemoveReason("The table partition count exceeds the specified table filter partition limit: " + + config.getFilter().getTblPartitionLimit() + " < " + et.getPartitions().size()); + + } + } + } + // Check for table size filter + if (config.getFilter().getTblSizeLimit() != null) { + Long dataSize = (Long)et.getStatistics().get(DATA_SIZE); + if (dataSize != null) { + if (config.getFilter().getTblSizeLimit() * (1024*1024) < dataSize) { + tableMirror.setRemove(Boolean.TRUE); + tableMirror.setRemoveReason("The table dataset size exceeds the specified table filter size limit: " + + config.getFilter().getTblSizeLimit() + "Mb < " + dataSize); + } + } + } + // Check for tables migration flag, to avoid 're-migration'. + String smFlag = TableUtils.getTblProperty(HMS_STORAGE_MIGRATION_FLAG, et); + if (smFlag != null) { + tableMirror.setRemove(Boolean.TRUE); + tableMirror.setRemoveReason("The table has already gone through the STORAGE_MIGRATION process on " + + smFlag + " If this isn't correct, remove the TBLPROPERTY '" + HMS_STORAGE_MIGRATION_FLAG + "' " + + "from the table and try again."); + } + if (!tableMirror.isRemove()) { + switch (config.getDataStrategy()) { + case SCHEMA_ONLY: + case CONVERT_LINKED: + case DUMP: + case LINKED: + // These scenario don't require stats. + break; + case SQL: + case HYBRID: + case EXPORT_IMPORT: + case STORAGE_MIGRATION: + case COMMON: + case ACID: + if (!TableUtils.isView(et) && TableUtils.isHiveNative(et)) { + loadTableStats(et); + } + break; + } } } @@ -427,7 +482,7 @@ public void getTableDefinition(Config config, String database, TableMirror table if (owner != null) { et.setOwner(owner); } - } catch (SQLException sed) { + } catch (SQLException sed) { // Failed to gather owner details. } } @@ -654,51 +709,84 @@ public Boolean runDatabaseSql(DBMirror dbMirror, Pair dbSqlPair) { } protected void loadTablePartitionMetadata(Connection conn, String database, EnvironmentTable envTable) throws SQLException { -// Connection conn = null; - -// try { -// conn = getConnection(); -// if (conn != null) { - - Statement stmt = null; - ResultSet resultSet = null; + Statement stmt = null; + ResultSet resultSet = null; + try { + stmt = conn.createStatement(); + LOG.debug(getEnvironment() + ":" + database + "." + envTable.getName() + + ": Loading Partitions"); + + resultSet = stmt.executeQuery(MessageFormat.format(MirrorConf.SHOW_PARTITIONS, database, envTable.getName())); + List partDef = new ArrayList(); + while (resultSet.next()) { + partDef.add(resultSet.getString(1)); + } + envTable.setPartitions(partDef); + } finally { + if (resultSet != null) { try { - stmt = conn.createStatement(); - LOG.debug(getEnvironment() + ":" + database + "." + envTable.getName() + - ": Loading Partitions"); + resultSet.close(); + } catch (SQLException sqlException) { + // ignore + } + } + if (stmt != null) { + try { + stmt.close(); + } catch (SQLException sqlException) { + // ignore + } + } + } + } - resultSet = stmt.executeQuery(MessageFormat.format(MirrorConf.SHOW_PARTITIONS, database, envTable.getName())); - List partDef = new ArrayList(); - while (resultSet.next()) { - partDef.add(resultSet.getString(1)); - } - envTable.setPartitions(partDef); - } finally { - if (resultSet != null) { - try { - resultSet.close(); - } catch (SQLException sqlException) { - // ignore - } - } - if (stmt != null) { - try { - stmt.close(); - } catch (SQLException sqlException) { - // ignore - } + protected void loadTableStats(EnvironmentTable envTable) throws SQLException { + // Determine File sizes in table or partitions. + /* + - Get Base location for table + - Get HadoopSession + - Do a 'count' of the location. + */ + String location = TableUtils.getLocation(envTable.getName(), envTable.getDefinition()); + // Only run checks against hdfs and ozone namespaces. + String[] locationParts = location.split(":"); + String protocol = locationParts[0]; + if (Context.getInstance().getSupportFileSystems().contains(protocol)) { + HadoopSession cli = null; + try { + cli = config.getCliPool().borrow(); + String countCmd = "count " + location; + CommandReturn cr = cli.processInput(countCmd); + if (!cr.isError() && cr.getRecords().size() == 1) { + // We should only get back one record. + List countRecord = cr.getRecords().get(0); + // 0 = Folder Count + // 1 = File Count + // 2 = Size Summary + try { + Double avgFileSize = (double) (Long.valueOf(countRecord.get(2).toString()) / + Integer.valueOf(countRecord.get(1).toString())); + envTable.getStatistics().put(DIR_COUNT, Integer.valueOf(countRecord.get(0).toString())); + envTable.getStatistics().put(FILE_COUNT, Integer.valueOf(countRecord.get(1).toString())); + envTable.getStatistics().put(DATA_SIZE, Long.valueOf(countRecord.get(2).toString())); + envTable.getStatistics().put(AVG_FILE_SIZE, avgFileSize); + envTable.getStatistics().put(TABLE_EMPTY, Boolean.FALSE); + } catch (ArithmeticException ae) { + // Directory is probably empty. + envTable.getStatistics().put(TABLE_EMPTY, Boolean.TRUE); } - } -// } -// } finally { -// try { -// if (conn != null) -// conn.close(); -// } catch (SQLException throwables) { -// // -// } -// } + } else { + // Issue getting count. + } + } finally { + if (cli != null) { + config.getCliPool().returnSession(cli); + } + } + } + // Determine Table File Format + TableUtils.getSerdeType(envTable); } @Override diff --git a/src/main/java/com/cloudera/utils/hadoop/hms/mirror/Config.java b/src/main/java/com/cloudera/utils/hadoop/hms/mirror/Config.java index 6593127a..b32789c9 100644 --- a/src/main/java/com/cloudera/utils/hadoop/hms/mirror/Config.java +++ b/src/main/java/com/cloudera/utils/hadoop/hms/mirror/Config.java @@ -52,6 +52,8 @@ public class Config { private static final Logger LOG = LogManager.getLogger(Config.class); + @JsonIgnore + private Date initDate = new Date(); private Acceptance acceptance = new Acceptance(); @JsonIgnore private HadoopSessionPool cliPool; @@ -60,14 +62,13 @@ public class Config { private boolean copyAvroSchemaUrls = Boolean.FALSE; private DataStrategy dataStrategy = DataStrategy.SCHEMA_ONLY; private Boolean databaseOnly = Boolean.FALSE; + private Filter filter = new Filter(); private Boolean skipLinkCheck = Boolean.FALSE; private String[] databases = null; private LegacyTranslations legacyTranslations = new LegacyTranslations(); @JsonIgnore private final String runMarker = new SimpleDateFormat("yyyyMMdd_HHmmss").format(new Date()); - @JsonIgnore - private Pattern dbFilterPattern = null; /* Prefix the DB with this to create an alternate db. Good for testing. @@ -76,8 +77,6 @@ public class Config { */ private String dbPrefix = null; private String dbRename = null; - @JsonIgnore // wip - private String dbRegEx = null; private Environment dumpSource = null; @JsonIgnore private final Messages errors = new Messages(100); @@ -136,14 +135,6 @@ public class Config { Transactional tables are NOT considered in this process. */ private boolean sync = Boolean.FALSE; - @JsonIgnore - private Pattern tblFilterPattern = null; - @JsonIgnore - private Pattern tblExcludeFilterPattern = null; - private String tblRegEx = null; - - private String tblExcludeRegEx = null; - private TransferConfig transfer = new TransferConfig(); private Boolean transferOwnership = Boolean.FALSE; @JsonIgnore @@ -278,6 +269,14 @@ public static void setup(String configFile) { } } + public Date getInitDate() { + return initDate; + } + + public void setInitDate(Date initDate) { + this.initDate = initDate; + } + @JsonIgnore public Boolean isTranslateLegacy() { Boolean rtn = Boolean.FALSE; @@ -321,6 +320,14 @@ public void setFlip(Boolean flip) { } } + public Filter getFilter() { + return filter; + } + + public void setFilter(Filter filter) { + this.filter = filter; + } + public Boolean isReplace() { return replace; } @@ -575,47 +582,6 @@ public ScheduledExecutorService getTransferThreadPool() { return transferThreadPool; } - public String getDbRegEx() { - return dbRegEx; - } - - public void setDbRegEx(String dbRegEx) { - this.dbRegEx = dbRegEx; - if (this.dbRegEx != null) - dbFilterPattern = Pattern.compile(dbRegEx); - else - dbFilterPattern = null; - - } - - public Pattern getDbFilterPattern() { - return dbFilterPattern; - } - - public String getTblRegEx() { - return tblRegEx; - } - - public void setTblRegEx(String tblRegEx) { - this.tblRegEx = tblRegEx; - if (this.tblRegEx != null) - tblFilterPattern = Pattern.compile(tblRegEx); - else - tblFilterPattern = null; - } - - public String getTblExcludeRegEx() { - return tblExcludeRegEx; - } - - public void setTblExcludeRegEx(String tblExcludeRegEx) { - this.tblExcludeRegEx = tblExcludeRegEx; - if (this.tblExcludeRegEx != null) - tblExcludeFilterPattern = Pattern.compile(tblExcludeRegEx); - else - tblExcludeFilterPattern = null; - - } public Boolean getSkipFeatures() { return skipFeatures; @@ -625,13 +591,6 @@ public void setSkipFeatures(Boolean skipFeatures) { this.skipFeatures = skipFeatures; } - public Pattern getTblFilterPattern() { - return tblFilterPattern; - } - - public Pattern getTblExcludeFilterPattern() { - return tblExcludeFilterPattern; - } public Boolean getResetRight() { return resetRight; @@ -860,7 +819,7 @@ && getMigrateACID().isDowngrade() warnings.set(RESET_TO_DEFAULT_LOCATION_WITHOUT_WAREHOUSE_DIRS.getCode()); } - if (sync && tblRegEx != null) { + if (sync && getFilter().getTblRegEx() != null) { warnings.set(SYNC_TBL_FILTER.getCode()); } if (sync && !(dataStrategy == DataStrategy.SCHEMA_ONLY || dataStrategy == DataStrategy.LINKED || diff --git a/src/main/java/com/cloudera/utils/hadoop/hms/mirror/Conversion.java b/src/main/java/com/cloudera/utils/hadoop/hms/mirror/Conversion.java index 28c0c0fd..7c3b933f 100644 --- a/src/main/java/com/cloudera/utils/hadoop/hms/mirror/Conversion.java +++ b/src/main/java/com/cloudera/utils/hadoop/hms/mirror/Conversion.java @@ -27,6 +27,7 @@ import java.math.BigDecimal; import java.text.DateFormat; import java.text.DecimalFormat; +import java.text.DecimalFormatSymbols; import java.text.SimpleDateFormat; import java.util.*; @@ -178,6 +179,7 @@ public String toReport(Config config, String database) throws JsonProcessingExce Date current = new Date(); BigDecimal elsecs = new BigDecimal(current.getTime() - start.getTime()).divide(new BigDecimal(1000)); DecimalFormat eldecf = new DecimalFormat("#,###.00"); + DecimalFormat lngdecf = new DecimalFormat("#,###"); String elsecStr = eldecf.format(elsecs); sb.append("| ").append(df.format(new Date())).append(" | ").append(elsecStr).append(" secs |\n\n"); @@ -257,7 +259,7 @@ public String toReport(Config config, String database) throws JsonProcessingExce sb.append("Source
ACID").append("\n"); sb.append("Phase
State").append("\n"); sb.append("Duration").append("\n"); - sb.append("Partition
Count").append("\n"); +// sb.append("Partition
Count").append("\n"); sb.append("Steps").append("\n"); if (dbMirror.hasActions()) { sb.append("Actions").append("\n"); @@ -265,6 +267,9 @@ public String toReport(Config config, String database) throws JsonProcessingExce if (dbMirror.hasAddedProperties()) { sb.append("Added
Properties").append("\n"); } + if (dbMirror.hasStatistics()) { + sb.append("Stats").append("\n"); + } if (dbMirror.hasIssues()) { sb.append("Issues").append("\n"); } @@ -302,8 +307,8 @@ public String toReport(Config config, String database) throws JsonProcessingExce sb.append("").append(secStr).append("").append("\n"); // Partition Count - sb.append("").append(let.getPartitioned() ? - let.getPartitions().size() : " ").append("").append("\n"); +// sb.append("").append(let.getPartitioned() ? +// let.getPartitions().size() : " ").append("").append("\n"); // Steps sb.append("\n"); @@ -381,6 +386,47 @@ public String toReport(Config config, String database) throws JsonProcessingExce sb.append(""); sb.append("").append("\n"); } + // Statistics + if (dbMirror.hasStatistics()) { + sb.append("").append("\n"); + sb.append(""); + for (Map.Entry entry : tblMirror.getEnvironments().entrySet()) { + if (entry.getValue().getStatistics().size() > 0) { + sb.append("\n"); + sb.append("\n"); + sb.append("").append("\n"); + + for (Map.Entry prop : entry.getValue().getStatistics().entrySet()) { + sb.append("\n"); + sb.append("\n"); + sb.append("\n"); + sb.append("\n"); + } + if (entry.getValue().getPartitioned()) { + sb.append("\n"); + sb.append("\n"); + sb.append("\n"); + sb.append("\n"); + } + } + } + sb.append("
"); + sb.append(entry.getKey()); + sb.append("
"); + sb.append(prop.getKey()); + sb.append(""); + if (prop.getValue() instanceof Double || prop.getValue() instanceof Long) { + sb.append(lngdecf.format(prop.getValue())); + } else { + sb.append(prop.getValue().toString()); + } + sb.append("
"); + sb.append(MirrorConf.PARTITION_COUNT); + sb.append(""); + sb.append(entry.getValue().getPartitions().size()); + sb.append("
"); + sb.append("").append("\n"); + } // Issues Reporting if (dbMirror.hasIssues()) { sb.append("").append("\n"); diff --git a/src/main/java/com/cloudera/utils/hadoop/hms/mirror/DBMirror.java b/src/main/java/com/cloudera/utils/hadoop/hms/mirror/DBMirror.java index 7b1e9715..7bea94fd 100644 --- a/src/main/java/com/cloudera/utils/hadoop/hms/mirror/DBMirror.java +++ b/src/main/java/com/cloudera/utils/hadoop/hms/mirror/DBMirror.java @@ -31,6 +31,8 @@ import static com.cloudera.utils.hadoop.hms.mirror.MessageCode.HDPHIVE3_DB_LOCATION; import static com.cloudera.utils.hadoop.hms.mirror.MessageCode.RO_DB_DOESNT_EXIST; +import static com.cloudera.utils.hadoop.hms.mirror.SessionVars.EXT_DB_LOCATION_PROP; +import static com.cloudera.utils.hadoop.hms.mirror.SessionVars.LEGACY_DB_LOCATION_PROP; public class DBMirror { private static final Logger LOG = LogManager.getLogger(DBMirror.class); @@ -267,9 +269,9 @@ public void buildDBStatements(Config config) { // SQL query to get default from Hive. String defaultDBLocProp = null; if (config.getCluster(Environment.RIGHT).getLegacyHive()) { - defaultDBLocProp = MirrorConf.LEGACY_DB_LOCATION_PROP; + defaultDBLocProp = LEGACY_DB_LOCATION_PROP; } else { - defaultDBLocProp = MirrorConf.EXT_DB_LOCATION_PROP; + defaultDBLocProp = EXT_DB_LOCATION_PROP; } Connection conn = null; @@ -496,4 +498,13 @@ public Boolean hasAddedProperties() { return rtn; } + public Boolean hasStatistics() { + Boolean rtn = Boolean.FALSE; + for (Map.Entry entry : tableMirrors.entrySet()) { + if (entry.getValue().hasStatistics()) + rtn = Boolean.TRUE; + } + return rtn; + } + } diff --git a/src/main/java/com/cloudera/utils/hadoop/hms/mirror/EnvironmentTable.java b/src/main/java/com/cloudera/utils/hadoop/hms/mirror/EnvironmentTable.java index 3e2f231e..3f6276d4 100644 --- a/src/main/java/com/cloudera/utils/hadoop/hms/mirror/EnvironmentTable.java +++ b/src/main/java/com/cloudera/utils/hadoop/hms/mirror/EnvironmentTable.java @@ -32,6 +32,9 @@ public class EnvironmentTable { private List partitions = new ArrayList(); private List actions = new ArrayList(); private Map addProperties = new TreeMap(); + + private Map statistics = new HashMap(); + private List issues = new ArrayList(); private final List sql = new ArrayList(); private final List cleanUpsql = new ArrayList(); @@ -81,7 +84,7 @@ public Boolean getPartitioned() { rtn = partitions.size() > 0 ? Boolean.TRUE : Boolean.FALSE; if (!rtn) { // Check the definition incase the partitions are empty. - rtn = TableUtils.isPartitioned(getName(), getDefinition()); + rtn = TableUtils.isPartitioned(this); } return rtn; } @@ -125,6 +128,14 @@ public void setAddProperties(Map addProperties) { this.addProperties = addProperties; } + public Map getStatistics() { + return statistics; + } + + public void setStatistics(Map statistics) { + this.statistics = statistics; + } + public List getIssues() { return issues; } diff --git a/src/main/java/com/cloudera/utils/hadoop/hms/mirror/Filter.java b/src/main/java/com/cloudera/utils/hadoop/hms/mirror/Filter.java new file mode 100644 index 00000000..49ad966b --- /dev/null +++ b/src/main/java/com/cloudera/utils/hadoop/hms/mirror/Filter.java @@ -0,0 +1,85 @@ +package com.cloudera.utils.hadoop.hms.mirror; + +import com.fasterxml.jackson.annotation.JsonIgnore; + +import java.util.regex.Pattern; + +public class Filter { + @JsonIgnore + private Pattern dbFilterPattern = null; + @JsonIgnore // wip + private String dbRegEx = null; + @JsonIgnore + private Pattern tblExcludeFilterPattern = null; + @JsonIgnore + private Pattern tblFilterPattern = null; + private String tblExcludeRegEx = null; + private String tblRegEx = null; + private Long tblSizeLimit = null; + private Integer tblPartitionLimit = null; + + public String getDbRegEx() { + return dbRegEx; + } + + public void setDbRegEx(String dbRegEx) { + this.dbRegEx = dbRegEx; + if (this.dbRegEx != null) + dbFilterPattern = Pattern.compile(dbRegEx); + else + dbFilterPattern = null; + + } + + public Pattern getDbFilterPattern() { + return dbFilterPattern; + } + + public String getTblRegEx() { + return tblRegEx; + } + + public void setTblRegEx(String tblRegEx) { + this.tblRegEx = tblRegEx; + if (this.tblRegEx != null) + tblFilterPattern = Pattern.compile(tblRegEx); + else + tblFilterPattern = null; + } + + public String getTblExcludeRegEx() { + return tblExcludeRegEx; + } + + public void setTblExcludeRegEx(String tblExcludeRegEx) { + this.tblExcludeRegEx = tblExcludeRegEx; + if (this.tblExcludeRegEx != null) + tblExcludeFilterPattern = Pattern.compile(tblExcludeRegEx); + else + tblExcludeFilterPattern = null; + + } + public Pattern getTblFilterPattern() { + return tblFilterPattern; + } + + public Pattern getTblExcludeFilterPattern() { + return tblExcludeFilterPattern; + } + + public Long getTblSizeLimit() { + return tblSizeLimit; + } + + public void setTblSizeLimit(Long tblSizeLimit) { + this.tblSizeLimit = tblSizeLimit; + } + + public Integer getTblPartitionLimit() { + return tblPartitionLimit; + } + + public void setTblPartitionLimit(Integer tblPartitionLimit) { + this.tblPartitionLimit = tblPartitionLimit; + } +} diff --git a/src/main/java/com/cloudera/utils/hadoop/hms/mirror/MirrorConf.java b/src/main/java/com/cloudera/utils/hadoop/hms/mirror/MirrorConf.java index 66924dd4..fc54c158 100644 --- a/src/main/java/com/cloudera/utils/hadoop/hms/mirror/MirrorConf.java +++ b/src/main/java/com/cloudera/utils/hadoop/hms/mirror/MirrorConf.java @@ -16,135 +16,83 @@ package com.cloudera.utils.hadoop.hms.mirror; -public class MirrorConf { - public static final String DESCRIBE_DB = "DESCRIBE DATABASE EXTENDED {0}"; - - public static final String SHOW_TABLES = "SHOW TABLES"; - public static final String SHOW_VIEWS = "SHOW VIEWS"; - public static final String SHOW_TABLE_EXTENDED = "SHOW TABLE EXTENDED LIKE {0}"; - public static final String SHOW_CREATE_TABLE = "SHOW CREATE TABLE {0}"; - public static final String DESCRIBE_FORMATTED_TABLE = "DESCRIBE FORMATTED {0}"; - public static final String SHOW_PARTITIONS = "SHOW PARTITIONS {0}.{1}"; - public static final String MSCK_REPAIR_TABLE = "MSCK REPAIR TABLE {0}"; - public static final String MSCK_REPAIR_TABLE_DESC = "MSCK Repair Table"; - public static final String CREATE_DB = +public interface MirrorConf { + String DESCRIBE_DB = "DESCRIBE DATABASE EXTENDED {0}"; + String SHOW_DATABASES = "SHOW DATABASES"; + String SHOW_TABLES = "SHOW TABLES"; + String SHOW_VIEWS = "SHOW VIEWS"; + String SHOW_TABLE_EXTENDED = "SHOW TABLE EXTENDED LIKE {0}"; + String SHOW_CREATE_TABLE = "SHOW CREATE TABLE {0}"; + String DESCRIBE_FORMATTED_TABLE = "DESCRIBE FORMATTED {0}"; + String SHOW_PARTITIONS = "SHOW PARTITIONS {0}.{1}"; + String MSCK_REPAIR_TABLE = "MSCK REPAIR TABLE {0}"; + String MSCK_REPAIR_TABLE_DESC = "MSCK Repair Table"; + String CREATE_DB = "CREATE DATABASE IF NOT EXISTS {0}"; - public static final String CREATE_DB_DESC = "Create Database"; - public static final String CREATE_TRANSFER_DB_DESC = "Create Transfer Database"; - public static final String DROP_DB = + String CREATE_DB_DESC = "Create Database"; + String CREATE_TRANSFER_DB_DESC = "Create Transfer Database"; + String DROP_DB = "DROP DATABASE IF EXISTS {0} CASCADE"; - public static final String DROP_DB_DESC = "Drop Database"; - public static final String ALTER_DB_LOCATION = + String DROP_DB_DESC = "Drop Database"; + String ALTER_DB_LOCATION = "ALTER DATABASE {0} SET LOCATION \"{1}\""; - public static final String ALTER_DB_LOCATION_DESC = + String ALTER_DB_LOCATION_DESC = "Alter Database Location"; - public static final String DEFAULT_MANAGED_BASE_DIR = "/warehouse/tablespace/managed/hive"; - public static final String ALTER_DB_MNGD_LOCATION = + String DEFAULT_MANAGED_BASE_DIR = "/warehouse/tablespace/managed/hive"; + String ALTER_DB_MNGD_LOCATION = "ALTER DATABASE {0} SET MANAGEDLOCATION \"{1}\""; - public static final String ALTER_DB_MNGD_LOCATION_DESC = + String ALTER_DB_MNGD_LOCATION_DESC = "Alter Database Managed Location"; - public static final String ANALYZE_TABLE_STATS = ""; - public static final String ANALYZE_COLUMN_STATS = ""; - public static final String CREATE_LIKE = + String CREATE_LIKE = "CREATE TABLE IF NOT EXISTS {0} LIKE {1}"; - public static final String CREATE_EXTERNAL_LIKE = + String CREATE_EXTERNAL_LIKE = "CREATE EXTERNAL TABLE IF NOT EXISTS {0} LIKE {1}"; - public static final String USE = "USE {0}"; - public static final String USE_DESC = "Set Database"; - public static final String DROP_TABLE = "DROP TABLE IF EXISTS {0}"; - public static final String DROP_TABLE_DESC = "Drop table"; - public static final String DROP_VIEW = "DROP VIEW IF EXISTS {0}"; - public static final String RENAME_TABLE_DESC = "Rename table"; - public static final String RENAME_TABLE = " ALTER TABLE {0} RENAME TO {1}"; - public static final String SET_OWNER_DESC = "Set table owner"; - public static final String SET_OWNER = "ALTER TABLE {0} SET OWNER USER {1}"; - public static final String EXPORT_TABLE = + String USE = "USE {0}"; + String USE_DESC = "Set Database"; + String DROP_TABLE = "DROP TABLE IF EXISTS {0}"; + String DROP_TABLE_DESC = "Drop table"; + String DROP_VIEW = "DROP VIEW IF EXISTS {0}"; + String RENAME_TABLE_DESC = "Rename table"; + String RENAME_TABLE = " ALTER TABLE {0} RENAME TO {1}"; + String SET_OWNER_DESC = "Set table owner"; + String SET_OWNER = "ALTER TABLE {0} SET OWNER USER {1}"; + String EXPORT_TABLE = "EXPORT TABLE {0} TO \"{1}\""; - public static final String IMPORT_EXTERNAL_TABLE = + String IMPORT_EXTERNAL_TABLE = "IMPORT EXTERNAL TABLE {0} FROM \"{1}\""; - public static final String IMPORT_TABLE = + String IMPORT_TABLE = "IMPORT TABLE {0} FROM \"{1}\""; - public static final String IMPORT_EXTERNAL_TABLE_LOCATION = + String IMPORT_EXTERNAL_TABLE_LOCATION = "IMPORT EXTERNAL TABLE {0} FROM \"{1}\" LOCATION \"{2}\""; - public static final String ADD_TABLE_PROP_DESC = + String ADD_TABLE_PROP_DESC = "Add/Update Table Property"; - public static final String ADD_TABLE_PROP = + String ADD_TABLE_PROP = "ALTER TABLE {0} SET TBLPROPERTIES (\"{1}\"=\"{2}\")"; - public static final String REMOVE_TABLE_PROP = + String REMOVE_TABLE_PROP = "ALTER TABLE {0} UNSET TBLPROPERTIES (\"{1}\")"; - public static final String REMOVE_TABLE_PROP_DESC = + String REMOVE_TABLE_PROP_DESC = "Remove table property"; - public static final String ALTER_TABLE_LOCATION = + String ALTER_TABLE_LOCATION = "ALTER TABLE {0} SET LOCATION \"{1}\""; - public static final String ALTER_TABLE_LOCATION_DESC = + String ALTER_TABLE_LOCATION_DESC = "Alter Table Location"; - public static final String ARCHIVE = "archive"; - public static final String HMS_MIRROR_LEGACY_MANAGED_FLAG = "hmsMirror_LegacyManaged"; - public static final String DOWNGRADED_FROM_ACID = "downgraded_from_acid"; - public static final String DISCOVER_PARTITIONS = "discover.partitions"; - public static final String TRANSLATED_TO_EXTERNAL = "TRANSLATED_TO_EXTERNAL"; - public static final String EXTERNAL_TABLE_PURGE = "external.table.purge"; - public static final String TRANSACTIONAL = "transactional"; - public static final String TRANSACTIONAL_PROPERTIES = "transactional_properties"; - public static final String HMS_MIRROR_TRANSFER_TABLE = "hms-mirror_transfer_table"; - public static final String HMS_MIRROR_SHADOW_TABLE = "hms-mirror_shadow_table"; - public static final String BUCKETING_VERSION = "bucketing_version"; - public static final String AVRO_SCHEMA_URL_KEY = "avro.schema.url"; - public static final String TEZ_EXECUTION_DESC = "Set 'tez' as the execution engine"; - public static final String SET_TEZ_AS_EXECUTION_ENGINE = "set hive.execution.engine=tez"; - public static final String SQL_DATA_TRANSFER = "FROM {0} INSERT INTO TABLE {1} SELECT *"; - public static final String SQL_DATA_TRANSFER_OVERWRITE = "FROM {0} INSERT OVERWRITE TABLE {1} SELECT *"; - public static final String SQL_DATA_TRANSFER_WITH_PARTITIONS_PRESCRIPTIVE = "FROM {0} INSERT OVERWRITE TABLE {1} PARTITION ({2}) SELECT * DISTRIBUTE BY {2}"; - public static final String SQL_DATA_TRANSFER_WITH_PARTITIONS_DECLARATIVE = "FROM {0} INSERT OVERWRITE TABLE {1} PARTITION ({2}) SELECT * "; - public static final String SORT_DYNAMIC_PARTITION = "hive.optimize.sort.dynamic.partition"; - public static final String SORT_DYNAMIC_PARTITION_THRESHOLD = "hive.optimize.sort.dynamic.partition.threshold"; - - /* - METADATA Transfer Flag - */ - public static final String HMS_MIRROR_METADATA_FLAG = "hmsMirror_Metadata_Stage1"; - public static final String HMS_MIRROR_CONVERTED_FLAG = "hmsMirror_Converted"; - - // Data Migration Flags - /* - Didn't move data (cloud storage scenario), but RIGHT cluster managed data flags - converted to upper cluster AND reset/unset in lower cluster. - */ - public static final String HMS_MIRROR_STORAGE_OWNER_FLAG = "hmsMirror_Storage_OWNER_Stage2"; - /* - Migrate Metadata only and use a temp table in the RIGHT cluster with a reference to the data - in the LEFT cluster and USE SQL to migrated the data from the temp table to a target table - in the RIGHT cluster that matches the LEFT cluster relative location. - */ - public static final String HMS_MIRROR_STORAGE_SQL_FLAG = "hmsMirror_Storage_SQL_Stage2"; - /* - Using Hive EXPORT to build a transferrable package of the schema and data in the lower cluster. - In the RIGHT cluster, with access to the LEFT cluster EXPORT location, IMPORT the table and data - into the RIGHT cluster. - Purge/Managed Adjustments: TBD - */ - public static final String HMS_MIRROR_STORAGE_IMPORT_FLAG = "hmsMirror_Storage_IMPORT_Stage2"; - /* - A mixed of SQL and IMPORT. Using table characteristics like partition count and data sizes to - determine whether to use SQL or EXPORT/IMPORT to move data. - Purge/Managed Adjustments: TBD - */ - public static final String HMS_MIRROR_STORAGE_HYBRID_FLAG = "hmsMirror_Storage_HYBRID_Stage2"; - /* - Build the schema in the upper cluster via the Metadata Mirror process. Then an 'external' process - uses 'distcp' to migrate the data in the background. - Requires EXTERNAL intervention. - */ - public static final String HMS_MIRROR_STORAGE_DISTCP_FLAG = "hmsMirror_Storage_DISTCP_Stage2"; - - public static final String LEGACY_DB_LOCATION_PROP = "hive.metastore.warehouse.dir"; - public static final String EXT_DB_LOCATION_PROP = "hive.metastore.warehouse.external.dir"; - public static final String MNGD_DB_LOCATION_PROP = "hive.metastore.warehouse.dir"; + String ARCHIVE = "archive"; + String SQL_DATA_TRANSFER = "FROM {0} INSERT INTO TABLE {1} SELECT *"; + String SQL_DATA_TRANSFER_OVERWRITE = "FROM {0} INSERT OVERWRITE TABLE {1} SELECT *"; + String SQL_DATA_TRANSFER_WITH_PARTITIONS_PRESCRIPTIVE = "FROM {0} INSERT OVERWRITE TABLE {1} PARTITION ({2}) SELECT * DISTRIBUTE BY {3}"; + String SQL_DATA_TRANSFER_WITH_PARTITIONS_DECLARATIVE = "FROM {0} INSERT OVERWRITE TABLE {1} PARTITION ({2}) SELECT * "; - public static final String DB_LOCATION = "LOCATION"; - public static final String DB_MANAGED_LOCATION = "MANAGEDLOCATION"; - public static final String COMMENT = "COMMENT"; + String DB_LOCATION = "LOCATION"; + String DB_MANAGED_LOCATION = "MANAGEDLOCATION"; + String COMMENT = "COMMENT"; + String FILE_FORMAT = "file.format"; + String FILE_COUNT = "file.count"; + String DIR_COUNT = "dir.count"; + String DATA_SIZE = "data.size"; + String AVG_FILE_SIZE = "avg.file.size"; + String TABLE_EMPTY = "table.empty"; + String PARTITION_COUNT = "partition.count"; } diff --git a/src/main/java/com/cloudera/utils/hadoop/hms/mirror/Optimization.java b/src/main/java/com/cloudera/utils/hadoop/hms/mirror/Optimization.java index eb1e95eb..c7ef0584 100644 --- a/src/main/java/com/cloudera/utils/hadoop/hms/mirror/Optimization.java +++ b/src/main/java/com/cloudera/utils/hadoop/hms/mirror/Optimization.java @@ -14,10 +14,11 @@ public class Optimization { - But do include additional settings specified by user in 'overrides'. */ private Boolean skip = Boolean.FALSE; + private Boolean autoTune = Boolean.FALSE; + private Boolean compressTextOutput = Boolean.FALSE; + private Overrides overrides = new Overrides(); private Boolean buildShadowStatistics = Boolean.FALSE; -// private Boolean smallFiles = Boolean.FALSE; -// private Integer tezGroupingMaxSizeMb = 128; public Boolean getSortDynamicPartitionInserts() { return sortDynamicPartitionInserts; @@ -35,6 +36,21 @@ public void setSkip(Boolean skip) { this.skip = skip; } + public Boolean getAutoTune() { + return autoTune; + } + + public void setAutoTune(Boolean autoTune) { + this.autoTune = autoTune; + } + + public Boolean getCompressTextOutput() { + return compressTextOutput; + } + + public void setCompressTextOutput(Boolean compressTextOutput) { + this.compressTextOutput = compressTextOutput; + } public Boolean getBuildShadowStatistics() { return buildShadowStatistics; @@ -52,19 +68,4 @@ public void setOverrides(Overrides overrides) { this.overrides = overrides; } -// public Boolean getSmallFiles() { -// return smallFiles; -// } -// -// public void setSmallFiles(Boolean smallFiles) { -// this.smallFiles = smallFiles; -// } -// -// public Integer getTezGroupingMaxSizeMb() { -// return tezGroupingMaxSizeMb; -// } -// -// public void setTezGroupingMaxSizeMb(Integer tezGroupingMaxSizeMb) { -// this.tezGroupingMaxSizeMb = tezGroupingMaxSizeMb; -// } } diff --git a/src/main/java/com/cloudera/utils/hadoop/hms/mirror/SerdeType.java b/src/main/java/com/cloudera/utils/hadoop/hms/mirror/SerdeType.java new file mode 100644 index 00000000..fa5f2c56 --- /dev/null +++ b/src/main/java/com/cloudera/utils/hadoop/hms/mirror/SerdeType.java @@ -0,0 +1,34 @@ +package com.cloudera.utils.hadoop.hms.mirror; + +import java.util.ArrayList; +import java.util.List; + +public enum SerdeType { + ORC(134217728, "org.apache.hadoop.hive.ql.io.orc.OrcSerde"), + PARQUET(134217728, "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe", + "parquet.hive.serde.ParquetHiveSerDe"), + TEXT(268435456, "org.apache.hadoop.hive.serde2.OpenCSVSerde", + "org.apache.hadoop.mapred.TextInputFormat", + "org.apache.hadoop.hive.serde2.avro.AvroSerDe", + "org.apache.hadoop.hive.serde2.JsonSerDe"), + BINARY(134217728,"org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe", + "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"), + UNKNOWN(268435456); + + Integer targetSize = null; + List serdeClasses = new ArrayList(); + + private SerdeType(Integer targetSize, String... serdeClasses) { + this.targetSize = targetSize; + for (String serdeClass : serdeClasses) { + this.serdeClasses.add(serdeClass); + } + } + + public Boolean isType(String serdeClass) { + Boolean rtn = Boolean.FALSE; + rtn = serdeClasses.contains(serdeClass); + return rtn; + } + +} diff --git a/src/main/java/com/cloudera/utils/hadoop/hms/mirror/SessionVars.java b/src/main/java/com/cloudera/utils/hadoop/hms/mirror/SessionVars.java new file mode 100644 index 00000000..d3816612 --- /dev/null +++ b/src/main/java/com/cloudera/utils/hadoop/hms/mirror/SessionVars.java @@ -0,0 +1,18 @@ +package com.cloudera.utils.hadoop.hms.mirror; + +public interface SessionVars { + String TEZ_EXECUTION_DESC = "Set 'tez' as the execution engine"; + String SET_TEZ_AS_EXECUTION_ENGINE = "set hive.execution.engine=tez"; + + String SORT_DYNAMIC_PARTITION = "hive.optimize.sort.dynamic.partition"; + String SORT_DYNAMIC_PARTITION_THRESHOLD = "hive.optimize.sort.dynamic.partition.threshold"; + String ANALYZE_TABLE_STATS = ""; + String ANALYZE_COLUMN_STATS = ""; + String LEGACY_DB_LOCATION_PROP = "hive.metastore.warehouse.dir"; + String EXT_DB_LOCATION_PROP = "hive.metastore.warehouse.external.dir"; + String MNGD_DB_LOCATION_PROP = "hive.metastore.warehouse.dir"; + String HIVE_COMPRESS_OUTPUT = "hive.exec.compress.output"; + String TEZ_GROUP_MAX_SIZE = "tez.grouping.max-size"; + String HIVE_MAX_DYNAMIC_PARTITIONS = "hive.exec.max.dynamic.partitions"; + String HIVE_MAX_REDUCERS = "hive.exec.reducers.max"; +} diff --git a/src/main/java/com/cloudera/utils/hadoop/hms/mirror/StatsCalculator.java b/src/main/java/com/cloudera/utils/hadoop/hms/mirror/StatsCalculator.java new file mode 100644 index 00000000..7d370e93 --- /dev/null +++ b/src/main/java/com/cloudera/utils/hadoop/hms/mirror/StatsCalculator.java @@ -0,0 +1,108 @@ +package com.cloudera.utils.hadoop.hms.mirror; + +import com.cloudera.utils.hadoop.hms.Context; +import com.cloudera.utils.hadoop.hms.util.TableUtils; + +import static com.cloudera.utils.hadoop.hms.mirror.MirrorConf.*; +import static com.cloudera.utils.hadoop.hms.mirror.SessionVars.*; + +/* +Provide a class where rules can be generated based on the hms-mirror stats collected. + */ +public class StatsCalculator { + + public static String getAdditionalPartitionDistribution(EnvironmentTable envTable) { + StringBuilder sb = new StringBuilder(); + + if (envTable.getPartitioned()) { + SerdeType stype = (SerdeType)envTable.getStatistics().get(FILE_FORMAT); + if (stype != null) { + if (envTable.getStatistics().get(DATA_SIZE) != null) { + Long dataSize = (Long)envTable.getStatistics().get(DATA_SIZE); + Long avgPartSize = Math.floorDiv(dataSize, (long)envTable.getPartitions().size()); + Long ratio = Math.floorDiv(avgPartSize, stype.targetSize) - 1; + if (ratio >= 1) { + sb.append("ROUND((rand() * 1000) % ").append(ratio.toString()).append(")"); + } + } + } + // Place the partition element AFTER the sub grouping to ensure we get it applied in the plan. + String partElement = TableUtils.getPartitionElements(envTable); + if (partElement != null) { + // Ensure we added element before placing comma. + if (sb.toString().length() > 0) { + sb.append(", "); + } + sb.append(partElement); + } + } + + return sb.toString(); + } + + public static String getTezMaxGrouping(EnvironmentTable envTable) { + StringBuilder sb = new StringBuilder(TableUtils.getPartitionElements(envTable)); + + if (envTable.getPartitioned()) { + String stype = (String)envTable.getStatistics().get(FILE_FORMAT); + if (stype != null) { + SerdeType serdeType = SerdeType.valueOf(stype); + if (envTable.getStatistics().get(AVG_FILE_SIZE) != null) { + Double avgFileSize = (Double)envTable.getStatistics().get(AVG_FILE_SIZE); + // If not 90% of target size. + if (avgFileSize < serdeType.targetSize * .9) { + + } + } + } + } + + return sb.toString(); + } + + public static void setSessionOptions(EnvironmentTable controlEnv, EnvironmentTable applyEnv) { + // Small File settings. + SerdeType stype = (SerdeType) controlEnv.getStatistics().get(FILE_FORMAT); + if (stype != null) { + // TODO: Trying to figure out if making this setting will bleed over to other sessions while reusing a connection. + if (controlEnv.getStatistics().get(AVG_FILE_SIZE) != null) { + Double avgFileSize = (Double)controlEnv.getStatistics().get(AVG_FILE_SIZE); + // If not 50% of target size. + if (avgFileSize < stype.targetSize * .5) { + applyEnv.addIssue("Setting " + TEZ_GROUP_MAX_SIZE + " to account for the sources 'small files'"); + // Set the tez group max size. + applyEnv.addSql("Setting the " + TEZ_GROUP_MAX_SIZE, + "set " +TEZ_GROUP_MAX_SIZE + "=" + stype.targetSize); + } + } + } + + // Check the partition count. + if (controlEnv.getPartitioned()) { + // MAX DYN PARTS: 1000 is the Apache default. CDP is 5000. Regardless, we'll set this to +20% + // Also check MAX REDUCERS + if (controlEnv.getPartitions().size() > 1000) { + applyEnv.addIssue("Setting " + HIVE_MAX_DYNAMIC_PARTITIONS); + applyEnv.addSql("Setting " + HIVE_MAX_DYNAMIC_PARTITIONS, + "set " + HIVE_MAX_DYNAMIC_PARTITIONS + "=" + + Double.toString(controlEnv.getPartitions().size() * 1.2)); + applyEnv.addIssue("Adjusting " + HIVE_MAX_REDUCERS + " to handle partition load"); + applyEnv.addSql("Setting " + HIVE_MAX_REDUCERS, + "set " + HIVE_MAX_REDUCERS + "=" + + Double.toString(controlEnv.getPartitions().size() * 2)); + } + } + + // Compression Settings. + if (controlEnv.getStatistics().get(FILE_FORMAT) != null + && controlEnv.getStatistics().get(FILE_FORMAT) == SerdeType.TEXT) { + if (Context.getInstance().getConfig().getOptimization().getCompressTextOutput()) { + applyEnv.addIssue("Setting " + HIVE_COMPRESS_OUTPUT + " because you've setting that optimization"); + applyEnv.addSql("Setting: " + HIVE_COMPRESS_OUTPUT, "set " + HIVE_COMPRESS_OUTPUT + "=true"); + } else { + applyEnv.addIssue("Setting " + HIVE_COMPRESS_OUTPUT + " because you HAVEN'T set that optimization"); + applyEnv.addSql("Setting: " + HIVE_COMPRESS_OUTPUT, "set " + HIVE_COMPRESS_OUTPUT + "=false"); + } + } + } +} diff --git a/src/main/java/com/cloudera/utils/hadoop/hms/mirror/TableMirror.java b/src/main/java/com/cloudera/utils/hadoop/hms/mirror/TableMirror.java index 0550926b..e01d7eba 100644 --- a/src/main/java/com/cloudera/utils/hadoop/hms/mirror/TableMirror.java +++ b/src/main/java/com/cloudera/utils/hadoop/hms/mirror/TableMirror.java @@ -16,6 +16,7 @@ package com.cloudera.utils.hadoop.hms.mirror; +import com.cloudera.utils.hadoop.hms.Context; import com.cloudera.utils.hadoop.hms.mirror.feature.Feature; import com.cloudera.utils.hadoop.hms.mirror.feature.FeaturesEnum; import com.cloudera.utils.hadoop.hms.util.TableUtils; @@ -31,6 +32,10 @@ import java.text.SimpleDateFormat; import java.util.*; +import static com.cloudera.utils.hadoop.hms.mirror.SessionVars.SORT_DYNAMIC_PARTITION; +import static com.cloudera.utils.hadoop.hms.mirror.SessionVars.SORT_DYNAMIC_PARTITION_THRESHOLD; +import static com.cloudera.utils.hadoop.hms.mirror.TablePropertyVars.*; + public class TableMirror { private static final Logger LOG = LogManager.getLogger(TableMirror.class); @@ -42,6 +47,7 @@ public class TableMirror { /* Use to indicate the tblMirror should be removed from processing, post setup. */ + private static DateFormat df = new SimpleDateFormat("yyyyMMddHHmmss"); @JsonIgnore private boolean remove = Boolean.FALSE; @JsonIgnore @@ -83,7 +89,8 @@ public String getName(Environment environment) { } public String getUnique() { - return unique; + return df.format(Context.getInstance().getConfig().getInitDate()); +// return unique; } public String getDbName() { @@ -298,6 +305,15 @@ public Boolean hasAddedProperties() { return rtn; } + public Boolean hasStatistics() { + Boolean rtn = Boolean.FALSE; + for (Map.Entry entry : environments.entrySet()) { + if (entry.getValue().getStatistics().size() > 0) + rtn = Boolean.TRUE; + } + return rtn; + } + public void setName(String name) { this.name = name; } @@ -316,7 +332,7 @@ public Boolean buildoutDUMPDefinition(Config config, DBMirror dbMirror) { // If not legacy, remove location from ACID tables. if (!config.getCluster(Environment.LEFT).getLegacyHive() && - TableUtils.isACID(let.getName(), let.getDefinition())) { + TableUtils.isACID(let)) { TableUtils.stripLocation(let.getName(), let.getDefinition()); } return Boolean.TRUE; @@ -421,7 +437,7 @@ public Boolean buildoutSCHEMA_ONLYDefinition(Config config, DBMirror dbMirror) { // If not legacy, remove location from ACID tables. if (rtn && !config.getCluster(Environment.LEFT).getLegacyHive() && - TableUtils.isACID(let.getName(), let.getDefinition())) { + TableUtils.isACID(let)) { TableUtils.stripLocation(let.getName(), let.getDefinition()); } return rtn; @@ -604,7 +620,7 @@ public Boolean buildoutSQLACIDDowngradeInplaceSQL(Config config, DBMirror dbMirr if (let.getPartitioned()) { if (config.getOptimization().getSkip()) { if (!config.getCluster(Environment.LEFT).getLegacyHive()) { - let.addSql("Setting " + MirrorConf.SORT_DYNAMIC_PARTITION, "set " + MirrorConf.SORT_DYNAMIC_PARTITION + "=false"); + let.addSql("Setting " + SORT_DYNAMIC_PARTITION, "set " + SORT_DYNAMIC_PARTITION + "=false"); } String partElement = TableUtils.getPartitionElements(let); String transferSql = MessageFormat.format(MirrorConf.SQL_DATA_TRANSFER_WITH_PARTITIONS_DECLARATIVE, @@ -612,16 +628,30 @@ public Boolean buildoutSQLACIDDowngradeInplaceSQL(Config config, DBMirror dbMirr String transferDesc = MessageFormat.format(TableUtils.STAGE_TRANSFER_PARTITION_DESC, let.getPartitions().size()); let.addSql(new Pair(transferDesc, transferSql)); } else if (config.getOptimization().getSortDynamicPartitionInserts()) { - // Prescriptive Optimization. + if (!config.getCluster(Environment.LEFT).getLegacyHive()) { + let.addSql("Setting " + SORT_DYNAMIC_PARTITION, "set " + SORT_DYNAMIC_PARTITION + "=true"); + let.addSql("Setting " + SORT_DYNAMIC_PARTITION_THRESHOLD, "set " + SORT_DYNAMIC_PARTITION_THRESHOLD + "=0"); + } String partElement = TableUtils.getPartitionElements(let); - String transferSql = MessageFormat.format(MirrorConf.SQL_DATA_TRANSFER_WITH_PARTITIONS_PRESCRIPTIVE, + String transferSql = MessageFormat.format(MirrorConf.SQL_DATA_TRANSFER_WITH_PARTITIONS_DECLARATIVE, let.getName(), ret.getName(), partElement); String transferDesc = MessageFormat.format(TableUtils.STORAGE_MIGRATION_TRANSFER_DESC, let.getPartitions().size()); let.addSql(new Pair(transferDesc, transferSql)); } else { + // Prescriptive Optimization. + if (!config.getCluster(Environment.LEFT).getLegacyHive()) { + let.addSql("Setting " + SORT_DYNAMIC_PARTITION, "set " + SORT_DYNAMIC_PARTITION + "=false"); + let.addSql("Setting " + SORT_DYNAMIC_PARTITION_THRESHOLD, "set " + SORT_DYNAMIC_PARTITION_THRESHOLD + "=-1"); + } String partElement = TableUtils.getPartitionElements(let); - String transferSql = MessageFormat.format(MirrorConf.SQL_DATA_TRANSFER_WITH_PARTITIONS_DECLARATIVE, - let.getName(), ret.getName(), partElement); + String distPartElemant = null; + if (Context.getInstance().getConfig().getOptimization().getAutoTune()) { + distPartElemant = StatsCalculator.getAdditionalPartitionDistribution(let); + } else { + distPartElemant = TableUtils.getPartitionElements(let); + } + String transferSql = MessageFormat.format(MirrorConf.SQL_DATA_TRANSFER_WITH_PARTITIONS_PRESCRIPTIVE, + let.getName(), ret.getName(), partElement, distPartElemant); String transferDesc = MessageFormat.format(TableUtils.STORAGE_MIGRATION_TRANSFER_DESC, let.getPartitions().size()); let.addSql(new Pair(transferDesc, transferSql)); } @@ -659,8 +689,8 @@ public Boolean buildoutSQLACIDDowngradeInplaceDefinition(Config config, DBMirror // Rename Original Table // Remove property (if exists) to prevent rename from happening. - if (TableUtils.hasTblProperty(MirrorConf.TRANSLATED_TO_EXTERNAL, let)) { - String unSetSql = MessageFormat.format(MirrorConf.REMOVE_TABLE_PROP, origTableName, MirrorConf.TRANSLATED_TO_EXTERNAL); + if (TableUtils.hasTblProperty(TRANSLATED_TO_EXTERNAL, let)) { + String unSetSql = MessageFormat.format(MirrorConf.REMOVE_TABLE_PROP, origTableName, TRANSLATED_TO_EXTERNAL); let.addSql(MirrorConf.REMOVE_TABLE_PROP_DESC, unSetSql); } @@ -990,7 +1020,7 @@ private Boolean buildoutHYBRIDDefinition(Config config, DBMirror dbMirror) { let = getEnvironmentTable(Environment.LEFT); - if (TableUtils.isACID(let.getName(), let.getDefinition())) { + if (TableUtils.isACID(let)) { if (config.getMigrateACID().isOn()) { rtn = buildoutIntermediateDefinition(config, dbMirror); } else { @@ -1147,7 +1177,9 @@ public Boolean buildoutSTORAGEMIGRATIONDefinition(Config config, DBMirror dbMirr addIssue(Environment.LEFT, "Table has already been migrated"); return Boolean.FALSE; } -// ret = getEnvironmentTable(Environment.RIGHT); + // Add the STORAGE_MIGRATED flag to the table definition. + DateFormat df = new SimpleDateFormat(); + TableUtils.upsertTblProperty(HMS_STORAGE_MIGRATION_FLAG, df.format(new Date()), let); // Create a 'target' table definition on left cluster with right definition (used only as place holder) copySpec = new CopySpec(config, Environment.LEFT, Environment.RIGHT); @@ -1265,12 +1297,12 @@ public Boolean buildoutSTORAGEMIGRATIONSql(Config config, DBMirror dbMirror) { let.addSql(TableUtils.USE_DESC, useDb); // Alter the current table and rename. // Remove property (if exists) to prevent rename from happening. - if (TableUtils.hasTblProperty(MirrorConf.TRANSLATED_TO_EXTERNAL, let)) { - String unSetSql = MessageFormat.format(MirrorConf.REMOVE_TABLE_PROP, ret.getName(), MirrorConf.TRANSLATED_TO_EXTERNAL); + if (TableUtils.hasTblProperty(TRANSLATED_TO_EXTERNAL, let)) { + String unSetSql = MessageFormat.format(MirrorConf.REMOVE_TABLE_PROP, ret.getName(), TRANSLATED_TO_EXTERNAL); let.addSql(MirrorConf.REMOVE_TABLE_PROP_DESC, unSetSql); } // Set unique name for old target to rename. - let.setName(let.getName() + "_" + getUnique()+"storage_migration"); + let.setName(let.getName() + "_" + getUnique()+"_storage_migration"); String origAlterRename = MessageFormat.format(MirrorConf.RENAME_TABLE, ret.getName(), let.getName()); let.addSql(MirrorConf.RENAME_TABLE_DESC, origAlterRename); @@ -1428,8 +1460,8 @@ public Boolean buildoutEXPORT_IMPORTSql(Config config, DBMirror dbMirror) { if (isACIDDowngradeInPlace(config, let)) { // Rename original table. // Remove property (if exists) to prevent rename from happening. - if (TableUtils.hasTblProperty(MirrorConf.TRANSLATED_TO_EXTERNAL, let)) { - String unSetSql = MessageFormat.format(MirrorConf.REMOVE_TABLE_PROP, origTableName, MirrorConf.TRANSLATED_TO_EXTERNAL); + if (TableUtils.hasTblProperty(TRANSLATED_TO_EXTERNAL, let)) { + String unSetSql = MessageFormat.format(MirrorConf.REMOVE_TABLE_PROP, origTableName, TRANSLATED_TO_EXTERNAL); let.addSql(MirrorConf.REMOVE_TABLE_PROP_DESC, unSetSql); } String newTblName = let.getName() + "_archive"; @@ -1457,7 +1489,7 @@ public Boolean buildoutEXPORT_IMPORTSql(Config config, DBMirror dbMirror) { String sourceLocation = TableUtils.getLocation(let.getName(), let.getDefinition()); String targetLocation = config.getTranslator().translateTableLocation(this, sourceLocation, config); String importSql; - if (TableUtils.isACID(let.getName(), let.getDefinition())) { + if (TableUtils.isACID(let)) { if (!config.getMigrateACID().isDowngrade()) { importSql = MessageFormat.format(MirrorConf.IMPORT_TABLE, let.getName(), importLoc); } else { @@ -1553,7 +1585,7 @@ protected Boolean buildSourceToTransferSql(Config config) { if (source.getPartitioned()) { if (config.getOptimization().getSkip()) { if (!config.getCluster(Environment.LEFT).getLegacyHive()) { - source.addSql("Setting " + MirrorConf.SORT_DYNAMIC_PARTITION, "set " + MirrorConf.SORT_DYNAMIC_PARTITION + "=false"); + source.addSql("Setting " + SORT_DYNAMIC_PARTITION, "set " + SORT_DYNAMIC_PARTITION + "=false"); } String partElement = TableUtils.getPartitionElements(source); String transferSql = MessageFormat.format(MirrorConf.SQL_DATA_TRANSFER_WITH_PARTITIONS_DECLARATIVE, @@ -1562,8 +1594,8 @@ protected Boolean buildSourceToTransferSql(Config config) { source.addSql(new Pair(transferDesc, transferSql)); } else if (config.getOptimization().getSortDynamicPartitionInserts()) { if (!config.getCluster(Environment.LEFT).getLegacyHive()) { - source.addSql("Setting " + MirrorConf.SORT_DYNAMIC_PARTITION, "set " + MirrorConf.SORT_DYNAMIC_PARTITION + "=true"); - source.addSql("Setting " + MirrorConf.SORT_DYNAMIC_PARTITION_THRESHOLD, "set " + MirrorConf.SORT_DYNAMIC_PARTITION_THRESHOLD + "=0"); + source.addSql("Setting " + SORT_DYNAMIC_PARTITION, "set " + SORT_DYNAMIC_PARTITION + "=true"); + source.addSql("Setting " + SORT_DYNAMIC_PARTITION_THRESHOLD, "set " + SORT_DYNAMIC_PARTITION_THRESHOLD + "=0"); } String partElement = TableUtils.getPartitionElements(source); String transferSql = MessageFormat.format(MirrorConf.SQL_DATA_TRANSFER_WITH_PARTITIONS_DECLARATIVE, @@ -1571,9 +1603,13 @@ protected Boolean buildSourceToTransferSql(Config config) { String transferDesc = MessageFormat.format(TableUtils.STAGE_TRANSFER_PARTITION_DESC, source.getPartitions().size()); source.addSql(new Pair(transferDesc, transferSql)); } else { + if (!config.getCluster(Environment.LEFT).getLegacyHive()) { + source.addSql("Setting " + SORT_DYNAMIC_PARTITION, "set " + SORT_DYNAMIC_PARTITION + "=false"); + source.addSql("Setting " + SORT_DYNAMIC_PARTITION_THRESHOLD, "set " + SORT_DYNAMIC_PARTITION_THRESHOLD + "=-1"); + } String partElement = TableUtils.getPartitionElements(source); String transferSql = MessageFormat.format(MirrorConf.SQL_DATA_TRANSFER_WITH_PARTITIONS_PRESCRIPTIVE, - source.getName(), transfer.getName(), partElement); + source.getName(), transfer.getName(), partElement, partElement); String transferDesc = MessageFormat.format(TableUtils.STAGE_TRANSFER_PARTITION_DESC, source.getPartitions().size()); source.addSql(new Pair(transferDesc, transferSql)); } @@ -1624,7 +1660,7 @@ protected Boolean buildShadowToFinalSql(Config config) { if (source.getPartitioned()) { if (config.getOptimization().getSkip()) { if (!config.getCluster(Environment.RIGHT).getLegacyHive()) { - target.addSql("Setting " + MirrorConf.SORT_DYNAMIC_PARTITION, "set " + MirrorConf.SORT_DYNAMIC_PARTITION + "=false"); + target.addSql("Setting " + SORT_DYNAMIC_PARTITION, "set " + SORT_DYNAMIC_PARTITION + "=false"); } String partElement = TableUtils.getPartitionElements(source); String shadowSql = MessageFormat.format(MirrorConf.SQL_DATA_TRANSFER_WITH_PARTITIONS_DECLARATIVE, @@ -1633,8 +1669,8 @@ protected Boolean buildShadowToFinalSql(Config config) { target.addSql(new Pair(shadowDesc, shadowSql)); } else if (config.getOptimization().getSortDynamicPartitionInserts()) { if (!config.getCluster(Environment.RIGHT).getLegacyHive()) { - target.addSql("Setting " + MirrorConf.SORT_DYNAMIC_PARTITION, "set " + MirrorConf.SORT_DYNAMIC_PARTITION + "=true"); - target.addSql("Setting " + MirrorConf.SORT_DYNAMIC_PARTITION_THRESHOLD, "set " + MirrorConf.SORT_DYNAMIC_PARTITION_THRESHOLD + "=0"); + target.addSql("Setting " + SORT_DYNAMIC_PARTITION, "set " + SORT_DYNAMIC_PARTITION + "=true"); + target.addSql("Setting " + SORT_DYNAMIC_PARTITION_THRESHOLD, "set " + SORT_DYNAMIC_PARTITION_THRESHOLD + "=0"); } String partElement = TableUtils.getPartitionElements(source); String shadowSql = MessageFormat.format(MirrorConf.SQL_DATA_TRANSFER_WITH_PARTITIONS_DECLARATIVE, @@ -1642,9 +1678,20 @@ protected Boolean buildShadowToFinalSql(Config config) { String shadowDesc = MessageFormat.format(TableUtils.LOAD_FROM_PARTITIONED_SHADOW_DESC, source.getPartitions().size()); target.addSql(new Pair(shadowDesc, shadowSql)); } else { + // Prescriptive + if (!config.getCluster(Environment.LEFT).getLegacyHive()) { + source.addSql("Setting " + SORT_DYNAMIC_PARTITION, "set " + SORT_DYNAMIC_PARTITION + "=false"); + source.addSql("Setting " + SORT_DYNAMIC_PARTITION_THRESHOLD, "set " + SORT_DYNAMIC_PARTITION_THRESHOLD + "=-1"); + } String partElement = TableUtils.getPartitionElements(source); + String distPartElemant = null; + if (Context.getInstance().getConfig().getOptimization().getAutoTune()) { + distPartElemant = StatsCalculator.getAdditionalPartitionDistribution(source); + } else { + distPartElemant = TableUtils.getPartitionElements(source); + } String shadowSql = MessageFormat.format(MirrorConf.SQL_DATA_TRANSFER_WITH_PARTITIONS_PRESCRIPTIVE, - shadow.getName(), target.getName(), partElement); + shadow.getName(), target.getName(), partElement, distPartElemant); String shadowDesc = MessageFormat.format(TableUtils.STORAGE_MIGRATION_TRANSFER_DESC, target.getPartitions().size()); target.addSql(new Pair(shadowDesc, shadowSql)); } @@ -1734,11 +1781,11 @@ public Boolean buildTableSchema(CopySpec copySpec) { converted = TableUtils.makeExternal(target); if (converted) { target.addIssue("Schema 'converted' from LEGACY managed to EXTERNAL"); - target.addProperty(MirrorConf.HMS_MIRROR_LEGACY_MANAGED_FLAG, converted.toString()); - target.addProperty(MirrorConf.HMS_MIRROR_CONVERTED_FLAG, converted.toString()); + target.addProperty(HMS_MIRROR_LEGACY_MANAGED_FLAG, converted.toString()); + target.addProperty(HMS_MIRROR_CONVERTED_FLAG, converted.toString()); if (copySpec.getTakeOwnership()) { if (!config.isNoPurge()) { - target.addProperty(MirrorConf.EXTERNAL_TABLE_PURGE, "true"); + target.addProperty(EXTERNAL_TABLE_PURGE, "true"); } } else { target.addIssue("Ownership of the data not allowed in this scenario, PURGE flag NOT set."); @@ -1751,19 +1798,19 @@ public Boolean buildTableSchema(CopySpec copySpec) { if (copySpec.getTakeOwnership()) { if (TableUtils.isACID(source)) { if (config.getMigrateACID().isDowngrade() && !config.isNoPurge()) { - target.addProperty(MirrorConf.EXTERNAL_TABLE_PURGE, "true"); + target.addProperty(EXTERNAL_TABLE_PURGE, "true"); } } else { - target.addProperty(MirrorConf.EXTERNAL_TABLE_PURGE, "true"); + target.addProperty(EXTERNAL_TABLE_PURGE, "true"); } } } } else { // Handle ACID tables. if (copySpec.isMakeNonTransactional()) { - TableUtils.removeTblProperty(MirrorConf.TRANSACTIONAL, target); - TableUtils.removeTblProperty(MirrorConf.TRANSACTIONAL_PROPERTIES, target); - TableUtils.removeTblProperty(MirrorConf.BUCKETING_VERSION, target); + TableUtils.removeTblProperty(TRANSACTIONAL, target); + TableUtils.removeTblProperty(TRANSACTIONAL_PROPERTIES, target); + TableUtils.removeTblProperty(BUCKETING_VERSION, target); } if (copySpec.isMakeExternal()) @@ -1772,12 +1819,12 @@ public Boolean buildTableSchema(CopySpec copySpec) { if (copySpec.getTakeOwnership()) { if (TableUtils.isACID(source)) { if (copySpec.getTarget() == Environment.TRANSFER) { - target.addProperty(MirrorConf.EXTERNAL_TABLE_PURGE, "true"); + target.addProperty(EXTERNAL_TABLE_PURGE, "true"); } else if (config.getMigrateACID().isDowngrade() && !config.isNoPurge()) { - target.addProperty(MirrorConf.EXTERNAL_TABLE_PURGE, "true"); + target.addProperty(EXTERNAL_TABLE_PURGE, "true"); } } else { - target.addProperty(MirrorConf.EXTERNAL_TABLE_PURGE, "true"); + target.addProperty(EXTERNAL_TABLE_PURGE, "true"); } } @@ -1796,9 +1843,9 @@ public Boolean buildTableSchema(CopySpec copySpec) { // TableUtils.upsertTblProperty(MirrorConf.DOWNGRADED_FROM_ACID, Boolean.TRUE.toString(), target); converted = TableUtils.makeExternal(target); if (!config.isNoPurge()) { - target.addProperty(MirrorConf.EXTERNAL_TABLE_PURGE, Boolean.TRUE.toString()); + target.addProperty(EXTERNAL_TABLE_PURGE, Boolean.TRUE.toString()); } - target.addProperty(MirrorConf.DOWNGRADED_FROM_ACID, Boolean.TRUE.toString()); + target.addProperty(DOWNGRADED_FROM_ACID, Boolean.TRUE.toString()); } if (TableUtils.removeBuckets(target, config.getMigrateACID().getArtificialBucketThreshold())) { @@ -1810,7 +1857,7 @@ public Boolean buildTableSchema(CopySpec copySpec) { // 2. Set mirror stage one flag if (copySpec.getTarget() == Environment.RIGHT) { - target.addProperty(MirrorConf.HMS_MIRROR_METADATA_FLAG, df.format(new Date())); + target.addProperty(HMS_MIRROR_METADATA_FLAG, df.format(new Date())); } // 3. Rename table @@ -1836,11 +1883,11 @@ public Boolean buildTableSchema(CopySpec copySpec) { TableUtils.removeTblProperty("last_modified_time", target); // 6. Set 'discover.partitions' if config and non-acid - if (config.getCluster(copySpec.getTarget()).getPartitionDiscovery().getAuto() && TableUtils.isPartitioned(target.getName(), target.getDefinition())) { + if (config.getCluster(copySpec.getTarget()).getPartitionDiscovery().getAuto() && TableUtils.isPartitioned(target)) { if (converted) { - target.addProperty(MirrorConf.DISCOVER_PARTITIONS, Boolean.TRUE.toString()); + target.addProperty(DISCOVER_PARTITIONS, Boolean.TRUE.toString()); } else if (TableUtils.isExternal(target)) { - target.addProperty(MirrorConf.DISCOVER_PARTITIONS, Boolean.TRUE.toString()); + target.addProperty(DISCOVER_PARTITIONS, Boolean.TRUE.toString()); } } @@ -1909,10 +1956,10 @@ public Boolean buildTableSchema(CopySpec copySpec) { switch (copySpec.getTarget()) { case TRANSFER: - TableUtils.upsertTblProperty(MirrorConf.HMS_MIRROR_TRANSFER_TABLE, "true", target); + TableUtils.upsertTblProperty(HMS_MIRROR_TRANSFER_TABLE, "true", target); break; case SHADOW: - TableUtils.upsertTblProperty(MirrorConf.HMS_MIRROR_SHADOW_TABLE, "true", target); + TableUtils.upsertTblProperty(HMS_MIRROR_SHADOW_TABLE, "true", target); break; } // 6. Go through the features, if any. @@ -1947,14 +1994,14 @@ public Boolean buildTableSchema(CopySpec copySpec) { } if (!copySpec.getTakeOwnership() && config.getDataStrategy() != DataStrategy.STORAGE_MIGRATION) { - TableUtils.removeTblProperty(MirrorConf.EXTERNAL_TABLE_PURGE, target); + TableUtils.removeTblProperty(EXTERNAL_TABLE_PURGE, target); } if (config.getCluster(copySpec.getTarget()).getLegacyHive() && config.getDataStrategy() != DataStrategy.STORAGE_MIGRATION) { // remove newer flags; - TableUtils.removeTblProperty(MirrorConf.EXTERNAL_TABLE_PURGE, target); - TableUtils.removeTblProperty(MirrorConf.DISCOVER_PARTITIONS, target); - TableUtils.removeTblProperty(MirrorConf.BUCKETING_VERSION, target); + TableUtils.removeTblProperty(EXTERNAL_TABLE_PURGE, target); + TableUtils.removeTblProperty(DISCOVER_PARTITIONS, target); + TableUtils.removeTblProperty(BUCKETING_VERSION, target); } } else if (TableUtils.isView(target)) { diff --git a/src/main/java/com/cloudera/utils/hadoop/hms/mirror/TablePropertyVars.java b/src/main/java/com/cloudera/utils/hadoop/hms/mirror/TablePropertyVars.java new file mode 100644 index 00000000..53ae8c5c --- /dev/null +++ b/src/main/java/com/cloudera/utils/hadoop/hms/mirror/TablePropertyVars.java @@ -0,0 +1,54 @@ +package com.cloudera.utils.hadoop.hms.mirror; + +public interface TablePropertyVars { + + /* +METADATA Transfer Flag + */ + String HMS_MIRROR_METADATA_FLAG = "hms-mirror_Metadata_Stage1"; + String HMS_MIRROR_CONVERTED_FLAG = "hms-mirror_Converted"; + // Data Migration Flags + /* + Didn't move data (cloud storage scenario), but RIGHT cluster managed data flags + converted to upper cluster AND reset/unset in lower cluster. + */ + String HMS_MIRROR_STORAGE_OWNER_FLAG = "hms-mirror_Storage_OWNER_Stage2"; + /* + Migrate Metadata only and use a temp table in the RIGHT cluster with a reference to the data + in the LEFT cluster and USE SQL to migrated the data from the temp table to a target table + in the RIGHT cluster that matches the LEFT cluster relative location. + */ + String HMS_MIRROR_STORAGE_SQL_FLAG = "hms-mirror_Storage_SQL_Stage2"; + /* + Using Hive EXPORT to build a transferrable package of the schema and data in the lower cluster. + In the RIGHT cluster, with access to the LEFT cluster EXPORT location, IMPORT the table and data + into the RIGHT cluster. + Purge/Managed Adjustments: TBD + */ + String HMS_MIRROR_STORAGE_IMPORT_FLAG = "hms-mirror_Storage_IMPORT_Stage2"; + /* + A mixed of SQL and IMPORT. Using table characteristics like partition count and data sizes to + determine whether to use SQL or EXPORT/IMPORT to move data. + Purge/Managed Adjustments: TBD + */ + String HMS_MIRROR_STORAGE_HYBRID_FLAG = "hms-mirror_Storage_HYBRID_Stage2"; + /* + Build the schema in the upper cluster via the Metadata Mirror process. Then an 'external' process + uses 'distcp' to migrate the data in the background. + Requires EXTERNAL intervention. + */ + String HMS_MIRROR_STORAGE_DISTCP_FLAG = "hms-mirror_Storage_DISTCP_Stage2"; + String HMS_MIRROR_LEGACY_MANAGED_FLAG = "hms-mirror_LegacyManaged"; + String DOWNGRADED_FROM_ACID = "downgraded_from_acid"; + String DISCOVER_PARTITIONS = "discover.partitions"; + String TRANSLATED_TO_EXTERNAL = "TRANSLATED_TO_EXTERNAL"; + String EXTERNAL_TABLE_PURGE = "external.table.purge"; + String TRANSACTIONAL = "transactional"; + String TRANSACTIONAL_PROPERTIES = "transactional_properties"; + String HMS_MIRROR_TRANSFER_TABLE = "hms-mirror_transfer_table"; + String HMS_MIRROR_SHADOW_TABLE = "hms-mirror_shadow_table"; + String HMS_STORAGE_MIGRATION_FLAG = "hms-mirror-STORAGE_MIGRATED"; + String BUCKETING_VERSION = "bucketing_version"; + String AVRO_SCHEMA_URL_KEY = "avro.schema.url"; + +} diff --git a/src/main/java/com/cloudera/utils/hadoop/hms/mirror/feature/BadParquetDefFeature.java b/src/main/java/com/cloudera/utils/hadoop/hms/mirror/feature/BadParquetDefFeature.java index 0a5471b0..e919a37d 100644 --- a/src/main/java/com/cloudera/utils/hadoop/hms/mirror/feature/BadParquetDefFeature.java +++ b/src/main/java/com/cloudera/utils/hadoop/hms/mirror/feature/BadParquetDefFeature.java @@ -59,7 +59,10 @@ public Boolean applicable(List schema) { // Need to check for proper spark defined ROW_FORMAT_SERDE // When present, don't fix this. It breaks Spark SQL. int rfsIdx = indexOf(schema, ROW_FORMAT_SERDE); - if (rfsIdx > 0) { + if (rfsIdx == -1) { + // Missing ROW FORMAT SERDE, so we need to fix it. + rtn = Boolean.TRUE; + } else if (rfsIdx > 0) { if (!schema.get(rfsIdx +1).trim().equals(ROW_FORMAT_SERDE_CLASS)) { rtn = Boolean.TRUE; } diff --git a/src/main/java/com/cloudera/utils/hadoop/hms/stage/GetTableMetadata.java b/src/main/java/com/cloudera/utils/hadoop/hms/stage/GetTableMetadata.java index 9ef46ed0..a3f6f4a3 100644 --- a/src/main/java/com/cloudera/utils/hadoop/hms/stage/GetTableMetadata.java +++ b/src/main/java/com/cloudera/utils/hadoop/hms/stage/GetTableMetadata.java @@ -62,13 +62,13 @@ public ReturnStatus doit() { ReturnStatus rtn = new ReturnStatus(); LOG.debug("Getting table definition for: " + dbMirror.getName() + "." + tblMirror.getName()); try { - config.getCluster(Environment.LEFT).getTableDefinition(config, dbMirror.getName(), tblMirror); + config.getCluster(Environment.LEFT).getTableDefinition(dbMirror.getName(), tblMirror); switch (config.getDataStrategy()) { case DUMP: successful = Boolean.TRUE; break; default: - config.getCluster(Environment.RIGHT).getTableDefinition(config, config.getResolvedDB(dbMirror.getName()), tblMirror); + config.getCluster(Environment.RIGHT).getTableDefinition(config.getResolvedDB(dbMirror.getName()), tblMirror); } } catch (SQLException throwables) { successful = Boolean.FALSE; diff --git a/src/main/java/com/cloudera/utils/hadoop/hms/stage/Setup.java b/src/main/java/com/cloudera/utils/hadoop/hms/stage/Setup.java index 23fa7cab..af6e3818 100644 --- a/src/main/java/com/cloudera/utils/hadoop/hms/stage/Setup.java +++ b/src/main/java/com/cloudera/utils/hadoop/hms/stage/Setup.java @@ -22,12 +22,17 @@ import org.apache.log4j.Logger; import java.math.RoundingMode; +import java.sql.Connection; +import java.sql.ResultSet; import java.sql.SQLException; +import java.sql.Statement; import java.text.DecimalFormat; import java.util.*; import java.util.concurrent.*; +import java.util.regex.Matcher; import static com.cloudera.utils.hadoop.hms.mirror.MessageCode.*; +import static com.cloudera.utils.hadoop.hms.mirror.MirrorConf.SHOW_DATABASES; /* Using the config, go through the databases and tables and collect the current states. @@ -51,6 +56,45 @@ public Boolean collect() { Date startTime = new Date(); LOG.info("GATHERING METADATA: Start Processing for databases: " + Arrays.toString((config.getDatabases()))); + // Check dbRegEx + if (config.getFilter().getDbRegEx() != null) { + // Look for the dbRegEx. + Connection conn = null; + Statement stmt = null; + List databases = new ArrayList(); + try { + conn = config.getCluster(Environment.LEFT).getConnection(); + if (conn != null) { + stmt = conn.createStatement(); + ResultSet rs = stmt.executeQuery(SHOW_DATABASES); + while (rs.next()) { + String db = rs.getString(1); + Matcher matcher = config.getFilter().getDbFilterPattern().matcher(db); + if (matcher.find()) { + databases.add(db); + } + } + String[] dbs = databases.toArray(new String[0]); + config.setDatabases(dbs); + } + } catch (SQLException se) { + // Issue + LOG.error("Issue getting databases for dbRegEx"); + } finally { + if (conn != null) { + try { + conn.close(); + } catch (SQLException e) { + throw new RuntimeException(e); + } + } + } + } + + if (config.getDatabases() == null || config.getDatabases().length == 0) { + throw new RuntimeException("No databases specified OR found if you used dbRegEx"); + } + List> gtf = new ArrayList>(); for (String database : config.getDatabases()) { DBMirror dbMirror = conversion.addDatabase(database); diff --git a/src/main/java/com/cloudera/utils/hadoop/hms/stage/Transfer.java b/src/main/java/com/cloudera/utils/hadoop/hms/stage/Transfer.java index 7b88632b..86177eca 100644 --- a/src/main/java/com/cloudera/utils/hadoop/hms/stage/Transfer.java +++ b/src/main/java/com/cloudera/utils/hadoop/hms/stage/Transfer.java @@ -16,6 +16,7 @@ package com.cloudera.utils.hadoop.hms.stage; +import com.cloudera.utils.hadoop.hms.Context; import com.cloudera.utils.hadoop.hms.mirror.*; import com.cloudera.utils.hadoop.HadoopSession; import com.cloudera.utils.hadoop.hms.util.TableUtils; @@ -29,6 +30,9 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; +import static com.cloudera.utils.hadoop.hms.mirror.SessionVars.*; +import static com.cloudera.utils.hadoop.hms.mirror.TablePropertyVars.EXTERNAL_TABLE_PURGE; + public class Transfer implements Callable { private static final Logger LOG = LogManager.getLogger(Transfer.class); public static Pattern protocolNSPattern = Pattern.compile("(^.*://)([a-zA-Z0-9](?:(?:[a-zA-Z0-9-]*|(? tableDefinition) public static String getTableNameFromDefinition(List tableDefinition) { String tableName = null; - for (String line: tableDefinition) { + for (String line : tableDefinition) { LOG.debug("Tablename Check: " + line); if (line.contains("CREATE")) { Matcher matcher = tableCreatePattern.matcher(line); if (matcher.find()) { - if (matcher.groupCount() == 2) { - tableName = matcher.group(1); - } else if (matcher.groupCount() == 3) { - tableName = matcher.group(2); + if (matcher.groupCount() == 3) { + if (matcher.group(3) == null) + tableName = matcher.group(2); + else + tableName = matcher.group(1); + } else if (matcher.groupCount() == 2) { + if (matcher.group(2) == null) + tableName = matcher.group(1); + else + tableName = matcher.group(2); } break; } else { @@ -110,7 +117,7 @@ public static Boolean doesTableNameMatchDirectoryName(List tableDefiniti public static Boolean doesTableNameMatchDirectoryName(String tableName, List tableDefinition) { String location = getLocation(tableName, tableDefinition); int idx = location.lastIndexOf('/'); - String dirName = location.substring(idx+1); + String dirName = location.substring(idx + 1); if (tableName.equals(dirName)) { return Boolean.TRUE; } else { @@ -130,7 +137,7 @@ public static String getSerdePath(String tableName, List tableDefinition String[] pathLine = sprop.split("="); if (pathLine.length == 2) { if (pathLine[1].startsWith("'")) { - location = pathLine[1].substring(1, pathLine[1].length()-2); + location = pathLine[1].substring(1, pathLine[1].length() - 2); } } break; @@ -204,7 +211,7 @@ public static Boolean updateAVROSchemaLocation(String tableName, List ta if (newLocation != null) { for (String line : tableDefinition) { - if (line.contains(MirrorConf.AVRO_SCHEMA_URL_KEY)) { + if (line.contains(AVRO_SCHEMA_URL_KEY)) { int lineIdx = tableDefinition.indexOf(line); String[] parts = line.split("="); LOG.debug("Old AVRO Schema location: " + parts[1]); @@ -357,26 +364,40 @@ CLUSTERED BY ( return rtn; } - public static Boolean isManaged(String tableName, List tableDefinition) { - Boolean rtn = Boolean.FALSE; - LOG.debug("Checking if table '" + tableName + "' is 'managed'"); - if (tableDefinition == null) { - throw new RuntimeException("Table definition for " + tableName + " is null."); + public static SerdeType getSerdeType(EnvironmentTable envTable) { + LOG.trace("Getting table location data for: " + envTable.getName()); + String serdeClass = null; + SerdeType rtn = SerdeType.UNKNOWN; + int locIdx = envTable.getDefinition().indexOf(ROW_FORMAT_SERDE); + if (locIdx > 0) { + serdeClass = envTable.getDefinition().get(locIdx + 1).trim().replace("'", ""); } - if (tableDefinition != null) { - for (String line : tableDefinition) { - if (line != null && line.startsWith(CREATE_TABLE)) { - rtn = Boolean.TRUE; + if (serdeClass != null) { + for (SerdeType serdeType : SerdeType.values()) { + if (serdeType.isType(serdeClass)) { + rtn = serdeType; break; } } } + envTable.getStatistics().put(FILE_FORMAT, rtn); return rtn; } public static Boolean isManaged(EnvironmentTable envTable) { Boolean rtn = Boolean.FALSE; - rtn = isManaged(envTable.getName(), envTable.getDefinition()); + LOG.debug("Checking if table '" + envTable.getName() + "' is 'managed'"); + if (envTable.getDefinition() == null) { + throw new RuntimeException("Table definition for " + envTable.getName() + " is null."); + } + if (envTable.getDefinition() != null) { + for (String line : envTable.getDefinition()) { + if (line != null && line.startsWith(CREATE_TABLE)) { + rtn = Boolean.TRUE; + break; + } + } + } return rtn; } @@ -440,30 +461,26 @@ public static Boolean prefixTableName(String tableName, String prefix, List tableDefinition) { + public static Boolean makeExternal(EnvironmentTable envTable) { Boolean rtn = Boolean.FALSE; - if (isManaged(tableName, tableDefinition)) { - LOG.debug("Converting table: " + tableName + " to EXTERNAL"); - for (String line : tableDefinition) { + if (isManaged(envTable)) { + LOG.debug("Converting table: " + envTable.getName() + " to EXTERNAL"); + for (String line : envTable.getDefinition()) { if (line.startsWith(CREATE_TABLE)) { - int indexCT = tableDefinition.indexOf(line); + int indexCT = envTable.getDefinition().indexOf(line); String cet = line.replace(CREATE_TABLE, CREATE_EXTERNAL_TABLE); - tableDefinition.set(indexCT, cet); + envTable.getDefinition().set(indexCT, cet); rtn = Boolean.TRUE; } } // If ACID, remove transactional property to complete conversion to external. - removeTblProperty(MirrorConf.TRANSACTIONAL, tableDefinition); - removeTblProperty(MirrorConf.TRANSACTIONAL_PROPERTIES, tableDefinition); - removeTblProperty(MirrorConf.BUCKETING_VERSION, tableDefinition); + removeTblProperty(TRANSACTIONAL, envTable.getDefinition()); + removeTblProperty(TRANSACTIONAL_PROPERTIES, envTable.getDefinition()); + removeTblProperty(BUCKETING_VERSION, envTable.getDefinition()); } return rtn; } - public static Boolean makeExternal(EnvironmentTable envTable) { - return makeExternal(envTable.getName(), envTable.getDefinition()); - } - public static Boolean fixTableDefinition(EnvironmentTable environmentTable) { return fixTableDefinition(environmentTable.getDefinition()); } @@ -475,10 +492,10 @@ public static Boolean fixTableDefinition(List tableDefinition) { // Remove trailing ',' from TBL_PROPERTIES. int tpIdx = tableDefinition.indexOf(TBL_PROPERTIES); if (tpIdx != -1) { - boolean hangingParen = tableDefinition.get(tableDefinition.size()-1).trim().equals(")")?Boolean.TRUE:Boolean.FALSE; - int checkLineNum = tableDefinition.size()-1; + boolean hangingParen = tableDefinition.get(tableDefinition.size() - 1).trim().equals(")") ? Boolean.TRUE : Boolean.FALSE; + int checkLineNum = tableDefinition.size() - 1; if (hangingParen) { - checkLineNum = tableDefinition.size()-2; + checkLineNum = tableDefinition.size() - 2; } for (int i = tpIdx + 1; i < tableDefinition.size() - 1; i++) { String line = tableDefinition.get(i).trim(); @@ -486,7 +503,7 @@ public static Boolean fixTableDefinition(List tableDefinition) { if (i >= checkLineNum) { if (line.endsWith(",")) { // need to remove comma. - String newLine = line.substring(0,line.length()-1); + String newLine = line.substring(0, line.length() - 1); // tableDefinition.remove(i); // Replace without comma tableDefinition.set(i, newLine); @@ -501,13 +518,13 @@ public static Boolean fixTableDefinition(List tableDefinition) { /* Check that its a Hive table and not a connector like HBase, Kafka, RDBMS, etc. */ - public static Boolean isHiveNative(String tableName, List tableDefinition) { + public static Boolean isHiveNative(EnvironmentTable envTable) { Boolean rtn = Boolean.FALSE; - LOG.debug("Checking if table '" + tableName + "' is 'native' (not a connector [HBase, Kafka, etc])"); - if (tableDefinition == null) { - throw new RuntimeException("Table definition for " + tableName + " is null."); + LOG.debug("Checking if table '" + envTable.getName() + "' is 'native' (not a connector [HBase, Kafka, etc])"); + if (envTable.getDefinition() == null) { + throw new RuntimeException("Table definition for " + envTable.getName() + " is null."); } - for (String line : tableDefinition) { + for (String line : envTable.getDefinition()) { if (line != null && line.trim().startsWith(LOCATION)) { rtn = Boolean.TRUE; break; @@ -516,23 +533,10 @@ public static Boolean isHiveNative(String tableName, List tableDefinitio return rtn; } - /* - Check that its a Hive table and not a connector like HBase, Kafka, RDBMS, etc. - */ - public static Boolean isHiveNative(EnvironmentTable envTable) { - Boolean rtn = Boolean.FALSE; - rtn = isHiveNative(envTable.getName(), envTable.getDefinition()); - return rtn; - } - public static Boolean isExternal(EnvironmentTable envTable) { - return isExternal(envTable.getName(), envTable.getDefinition()); - } - - public static Boolean isExternal(String tableName, List tableDefinition) { Boolean rtn = Boolean.FALSE; - LOG.debug("Checking if table '" + tableName + "' is 'external'"); - for (String line : tableDefinition) { + LOG.debug("Checking if table '" + envTable.getName() + "' is 'external'"); + for (String line : envTable.getDefinition()) { if (line.startsWith(CREATE_EXTERNAL_TABLE)) { rtn = Boolean.TRUE; break; @@ -541,24 +545,24 @@ public static Boolean isExternal(String tableName, List tableDefinition) return rtn; } - public static Boolean isHive3Standard(String tableName, List tableDefinition) { - if (isManaged(tableName, tableDefinition) && !isACID(tableName, tableDefinition)) { + public static Boolean isHive3Standard(EnvironmentTable envTable) { + if (isManaged(envTable) && !isACID(envTable)) { return Boolean.FALSE; } else { return Boolean.TRUE; } } - public static Boolean isHMSConverted(String tableName, List tableDefinition) { + public static Boolean isHMSConverted(EnvironmentTable envTable) { Boolean rtn = Boolean.FALSE; - LOG.debug("Checking if table '" + tableName + "' was converted by 'hms-mirror'"); - if (tableDefinition == null) { - throw new RuntimeException("Table definition for " + tableName + " is null."); + LOG.debug("Checking if table '" + envTable.getName() + "' was converted by 'hms-mirror'"); + if (envTable.getDefinition() == null) { + throw new RuntimeException("Table definition for " + envTable.getName() + " is null."); } - for (String line : tableDefinition) { + for (String line : envTable.getDefinition()) { if (line != null) { String tline = line.trim(); - if (tline.toLowerCase().startsWith("'" + MirrorConf.HMS_MIRROR_CONVERTED_FLAG.toLowerCase())) { + if (tline.toLowerCase().startsWith("'" + HMS_MIRROR_CONVERTED_FLAG.toLowerCase())) { String[] prop = tline.split("="); if (prop.length == 2) { // Stripe the quotes @@ -579,16 +583,12 @@ public static Boolean isHMSConverted(String tableName, List tableDefinit } public static Boolean isView(EnvironmentTable envTable) { - return isView(envTable.getName(), envTable.getDefinition()); - } - - public static Boolean isView(String name, List definition) { Boolean rtn = Boolean.FALSE; - if (definition == null) { - throw new RuntimeException("Definition for " + name + " is null."); + if (envTable.getDefinition() == null) { + throw new RuntimeException("Definition for " + envTable.getName() + " is null."); } - for (String line : definition) { + for (String line : envTable.getDefinition()) { if (line.trim().startsWith(CREATE_VIEW)) { rtn = Boolean.TRUE; break; @@ -598,20 +598,16 @@ public static Boolean isView(String name, List definition) { } public static Boolean isACID(EnvironmentTable envTable) { - return isACID(envTable.getName(), envTable.getDefinition()); - } - - public static Boolean isACID(String tableName, List tableDefinition) { Boolean rtn = Boolean.FALSE; - LOG.debug("Checking if table '" + tableName + "' is 'transactional(ACID)'"); - if (tableDefinition == null) { - throw new RuntimeException("Table definition for " + tableName + " is null."); + LOG.debug("Checking if table '" + envTable.getName() + "' is 'transactional(ACID)'"); + if (envTable.getDefinition() == null) { + throw new RuntimeException("Table definition for " + envTable.getName() + " is null."); } - if (isManaged(tableName, tableDefinition)) { - for (String line : tableDefinition) { + if (isManaged(envTable)) { + for (String line : envTable.getDefinition()) { if (line != null) { String tline = line.trim(); - if (tline.toLowerCase().startsWith("'" + MirrorConf.TRANSACTIONAL)) { + if (tline.toLowerCase().startsWith("'" + TRANSACTIONAL)) { String[] prop = tline.split("="); if (prop.length == 2) { // Stripe the quotes @@ -622,6 +618,7 @@ public static Boolean isACID(String tableName, List tableDefinition) { } if (Boolean.valueOf(value)) { rtn = Boolean.TRUE; + envTable.getStatistics().put(TRANSACTIONAL, Boolean.TRUE); } } break; @@ -633,20 +630,16 @@ public static Boolean isACID(String tableName, List tableDefinition) { } public static Boolean isExternalPurge(EnvironmentTable envTable) { - return isExternalPurge(envTable.getName(), envTable.getDefinition()); - } - - public static Boolean isExternalPurge(String tableName, List tableDefinition) { Boolean rtn = Boolean.FALSE; - LOG.debug("Checking if table '" + tableName + "' is an 'External' Purge table"); - if (tableDefinition == null) { - throw new RuntimeException("Table definition for " + tableName + " is null."); + LOG.debug("Checking if table '" + envTable.getName() + "' is an 'External' Purge table"); + if (envTable.getDefinition() == null) { + throw new RuntimeException("Table definition for " + envTable.getName() + " is null."); } - if (isExternal(tableName, tableDefinition)) { - for (String line : tableDefinition) { + if (isExternal(envTable)) { + for (String line : envTable.getDefinition()) { if (line != null) { String tline = line.trim(); - if (tline.toLowerCase().startsWith("'" + MirrorConf.EXTERNAL_TABLE_PURGE)) { + if (tline.toLowerCase().startsWith("'" + EXTERNAL_TABLE_PURGE)) { String[] prop = tline.split("="); if (prop.length == 2) { // Stripe the quotes @@ -657,6 +650,7 @@ public static Boolean isExternalPurge(String tableName, List tableDefini } if (Boolean.valueOf(value)) { rtn = Boolean.TRUE; + envTable.getStatistics().put(EXTERNAL_TABLE_PURGE, Boolean.TRUE); } } break; @@ -694,13 +688,14 @@ public static String tableFieldsFingerPrint(List tableDef) { return hashText; } - public static Boolean isPartitioned(String tableName, List tableDefinition) { + + public static Boolean isPartitioned(EnvironmentTable envTable) { Boolean rtn = Boolean.FALSE; - LOG.debug("Checking if table '" + tableName + "' is 'Partitioned'"); - if (tableDefinition == null) { + LOG.debug("Checking if table '" + envTable.getName() + "' is 'Partitioned'"); + if (envTable.getDefinition() == null) { return rtn; } - for (String line : tableDefinition) { + for (String line : envTable.getDefinition()) { if (line != null && line.startsWith(PARTITIONED_BY)) { rtn = Boolean.TRUE; break; @@ -710,17 +705,13 @@ public static Boolean isPartitioned(String tableName, List tableDefiniti } public static Boolean isAVROSchemaBased(EnvironmentTable envTable) { - return isAVROSchemaBased(envTable.getName(), envTable.getDefinition()); - } - - public static Boolean isAVROSchemaBased(String tableName, List tableDefinition) { Boolean rtn = Boolean.FALSE; - LOG.debug("Checking if table '" + tableName + "' is an AVRO table using a schema file in hcfs."); - if (tableDefinition == null) { - throw new RuntimeException("Table definition for " + tableName + " is null."); + LOG.debug("Checking if table '" + envTable.getName() + "' is an AVRO table using a schema file in hcfs."); + if (envTable.getDefinition() == null) { + throw new RuntimeException("Table definition for " + envTable.getName() + " is null."); } - for (String line : tableDefinition) { - if (line != null && line.contains(MirrorConf.AVRO_SCHEMA_URL_KEY)) { + for (String line : envTable.getDefinition()) { + if (line != null && line.contains(AVRO_SCHEMA_URL_KEY)) { rtn = Boolean.TRUE; break; } @@ -736,7 +727,7 @@ public static String getAVROSchemaPath(String tblName, List tblDefinitio String rtn = null; LOG.debug("Retrieving AVRO Schema Path for " + tblName); for (String line : tblDefinition) { - if (line.contains(MirrorConf.AVRO_SCHEMA_URL_KEY)) { + if (line.contains(AVRO_SCHEMA_URL_KEY)) { try { String[] parts = line.split("="); if (parts.length > 2) { @@ -760,20 +751,20 @@ public static String getAVROSchemaPath(String tblName, List tblDefinitio return rtn; } - public static Boolean isLegacyManaged(Cluster cluster, String tableName, List tableDefinition) { + public static Boolean isLegacyManaged(Cluster cluster, EnvironmentTable envTable) { Boolean rtn = Boolean.FALSE; - if (isManaged(tableName, tableDefinition) && cluster.getLegacyHive() && !isACID(tableName, tableDefinition)) { + if (isManaged(envTable) && cluster.getLegacyHive() && !isACID(envTable)) { rtn = Boolean.TRUE; } return rtn; } - public static Boolean isHMSLegacyManaged(String tableName, List tableDefinition) { + public static Boolean isHMSLegacyManaged(EnvironmentTable envTable) { Boolean rtn = Boolean.FALSE; - LOG.debug("Checking if table '" + tableName + "' was tagged as Legacy Managed by 'hms-mirror'"); - for (String line : tableDefinition) { + LOG.debug("Checking if table '" + envTable.getName() + "' was tagged as Legacy Managed by 'hms-mirror'"); + for (String line : envTable.getDefinition()) { String tline = line.trim(); - if (tline.toLowerCase().startsWith("'" + MirrorConf.HMS_MIRROR_LEGACY_MANAGED_FLAG.toLowerCase())) { + if (tline.toLowerCase().startsWith("'" + HMS_MIRROR_LEGACY_MANAGED_FLAG.toLowerCase())) { String[] prop = tline.split("="); if (prop.length == 2) { // Stripe the quotes @@ -844,9 +835,9 @@ public static void upsertTblProperty(String key, String value, List tabl public static StorageType getStorageType(List tblDef) { // String rtn = null; int tpIdx = tblDef.indexOf("ROW FORMAT SERDE"); - String rowformatSerde = tblDef.get(tpIdx+1); + String rowformatSerde = tblDef.get(tpIdx + 1); tpIdx = tblDef.indexOf("STORED AS INPUTFORMAT"); - String inputFormat = tblDef.get(tpIdx+1); + String inputFormat = tblDef.get(tpIdx + 1); StorageType storageType = StorageType.from(rowformatSerde, inputFormat); return storageType; @@ -888,7 +879,7 @@ public static String getTblProperty(String key, List tblDef) { } // Remove Comma, if present. if (rtn != null && rtn.endsWith(",")) - rtn = rtn.substring(0, rtn.length()-1); + rtn = rtn.substring(0, rtn.length() - 1); return rtn; } diff --git a/src/test/java/com/cloudera/utils/hadoop/hms/DataState.java b/src/test/java/com/cloudera/utils/hadoop/hms/DataState.java index e4fb69d4..ebb74574 100644 --- a/src/test/java/com/cloudera/utils/hadoop/hms/DataState.java +++ b/src/test/java/com/cloudera/utils/hadoop/hms/DataState.java @@ -17,6 +17,16 @@ package com.cloudera.utils.hadoop.hms; +import com.cloudera.utils.hadoop.hms.mirror.Config; +import com.cloudera.utils.hadoop.hms.mirror.Environment; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.SerializationFeature; +import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; +import org.apache.commons.io.FileUtils; + +import java.io.File; +import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.text.SimpleDateFormat; import java.util.Date; import java.util.Map; @@ -27,9 +37,9 @@ public class DataState { private static DataState instance = null; protected String configuration = null; - // protected Boolean dataCreated = Boolean.FALSE; - protected Map dataCreated = new TreeMap(); + protected Map> dataCreated = new TreeMap<>(); +// protected Map dataCreated = new TreeMap(); private Boolean skipAdditionDataCreation = Boolean.FALSE; protected Boolean execute = Boolean.FALSE; @@ -59,9 +69,15 @@ public String getConfiguration() { return configuration; } - public void setConfiguration(String configuration) { + public void setConfiguration(String configuration) throws IOException { this.configuration = System.getProperty("user.home") + "/.hms-mirror/cfg/" + configuration; + File cfgFile = new File(this.configuration); + ObjectMapper mapper = new ObjectMapper(new YAMLFactory()); + mapper.enable(SerializationFeature.WRITE_DATES_AS_TIMESTAMPS); + String yamlCfgFile = FileUtils.readFileToString(cfgFile, StandardCharsets.UTF_8); + Config cfg = mapper.readerFor(Config.class).readValue(yamlCfgFile); + Context.getInstance().setConfig(cfg); } public String getTable_filter() { @@ -109,8 +125,16 @@ public void setPopulate(Boolean populate) { public Boolean isDataCreated(String dataset) { Boolean rtn = Boolean.FALSE; if (!skipAdditionDataCreation) { - if (dataCreated.containsKey(dataset)) - rtn = dataCreated.get(dataset); + Config cfg = Context.getInstance().getConfig(); + String namespace = cfg.getCluster(Environment.LEFT).getHcfsNamespace(); + Map nsCreatedDataset = dataCreated.get(namespace); + if (nsCreatedDataset == null) { + nsCreatedDataset = new TreeMap(); + dataCreated.put(namespace, nsCreatedDataset); + } + if (nsCreatedDataset.containsKey(dataset)) { + rtn = Boolean.TRUE; + } } else { rtn = Boolean.TRUE; } @@ -125,9 +149,15 @@ public void setCleanUp(Boolean cleanUp) { this.cleanUp = cleanUp; } - public void setDataCreated(String dataset, Boolean dataCreated) { - this.dataCreated.put(dataset, dataCreated); -// this.dataCreated = dataCreated; + public void setDataCreated(String dataset, Boolean dataCreatedFlag) { + Config cfg = Context.getInstance().getConfig(); + String namespace = cfg.getCluster(Environment.LEFT).getHcfsNamespace(); + Map nsCreatedDataset = dataCreated.get(namespace); + if (nsCreatedDataset == null) { + nsCreatedDataset = new TreeMap(); + dataCreated.put(namespace, nsCreatedDataset); + } + nsCreatedDataset.put(dataset, dataCreatedFlag); } public void setExecute(Boolean execute) { diff --git a/src/test/java/com/cloudera/utils/hadoop/hms/EncryptValidationTest.java b/src/test/java/com/cloudera/utils/hadoop/hms/EncryptValidationTest.java index 368d952b..e68b7125 100644 --- a/src/test/java/com/cloudera/utils/hadoop/hms/EncryptValidationTest.java +++ b/src/test/java/com/cloudera/utils/hadoop/hms/EncryptValidationTest.java @@ -17,10 +17,10 @@ package com.cloudera.utils.hadoop.hms; +import com.cloudera.utils.hadoop.hms.datastrategy.MirrorTestBase; import com.cloudera.utils.hadoop.hms.mirror.MessageCode; import com.cloudera.utils.hadoop.hms.mirror.MirrorConf; import com.cloudera.utils.hadoop.hms.mirror.Pair; -import org.junit.After; import org.junit.AfterClass; import org.junit.Before; import org.junit.Test; @@ -29,8 +29,6 @@ import java.util.ArrayList; import java.util.List; -import static com.cloudera.utils.hadoop.hms.TestSQL.*; -import static com.cloudera.utils.hadoop.hms.TestSQL.TBL_INSERT; import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertFalse; @@ -39,72 +37,16 @@ public class EncryptValidationTest extends MirrorTestBase { private static final String PKEY = "test"; // private String PW = "" - @Before - public void setUp() throws Exception { - super.setUp(); - DataState.getInstance().setConfiguration(CDP_ENCRYPT); - if (DataState.getInstance().getPopulate() == null) { - DataState.getInstance().setPopulate(Boolean.FALSE); - } - dataSetup01(); - } - - @After - public void tearDown() throws Exception { - dataCleanup(DATACLEANUP.LEFT); - } - @AfterClass public static void tearDownClass() throws Exception { - dataCleanup(DATACLEANUP.LEFT); +// dataCleanup(DATACLEANUP.BOTH); } -// public Boolean dataSetup01() { -// if (!DataState.getInstance().isDataCreated("set01")) { -// String nameofCurrMethod = new Throwable() -// .getStackTrace()[0] -// .getMethodName(); -// -// String outputDir = outputDirBase + nameofCurrMethod; -// -// String[] args = new String[]{"-d", "STORAGE_MIGRATION", "-smn", "s3a://something_not_relevant", -// "-pkey", PKEY, -// "-wd", "/hello", "-ewd", "/hello-ext", -// "-db", DataState.getInstance().getWorking_db(), "-o", outputDir, -// "-cfg", DataState.getInstance().getConfiguration()}; -// args = toExecute(args, execArgs, Boolean.TRUE); -// -// List leftSql = new ArrayList(); -// build_use_db(leftSql); -// -// List dataset = null; -// if (DataState.getInstance().getPopulate() == null || DataState.getInstance().getPopulate()) { -// dataset = getDataset(2, 200, null); -// } -// build_n_populate(CREATE_LEGACY_ACID_TBL_N_BUCKETS, TBL_INSERT, dataset, leftSql, new String[]{"acid_01", "2"}); -// if (DataState.getInstance().getPopulate() == null || DataState.getInstance().getPopulate()) { -// dataset = getDataset(2, 400, null); -// } -// build_n_populate(CREATE_LEGACY_ACID_TBL_N_BUCKETS, TBL_INSERT, dataset, leftSql, new String[]{"acid_02", "6"}); -// if (DataState.getInstance().getPopulate() == null || DataState.getInstance().getPopulate()) { -// dataset = getDataset(3, 400, null); -// } -// build_n_populate(CREATE_LEGACY_ACID_TBL_N_BUCKETS_PARTITIONED, TBL_INSERT_PARTITIONED, dataset, leftSql, new String[]{"acid_03", "6"}); -// if (DataState.getInstance().getPopulate() == null || DataState.getInstance().getPopulate()) { -// dataset = getDataset(2, 2000, 500); -// } -// build_n_populate(CREATE_EXTERNAL_TBL_PARTITIONED, TBL_INSERT_PARTITIONED, dataset, leftSql, new String[]{"ext_part_01"}); -// if (DataState.getInstance().getPopulate() == null || DataState.getInstance().getPopulate()) { -// dataset = getDataset(2, 2000, null); -// } -// build_n_populate(CREATE_EXTERNAL_TBL, TBL_INSERT, dataset, leftSql, new String[]{"ext_part_02"}); -// -// Mirror cfgMirror = new Mirror(); -// long rtn = cfgMirror.setupSql(args, leftSql, null); -// DataState.getInstance().setDataCreated("set01", Boolean.TRUE); -// } -// return Boolean.TRUE; -// } + @Before + public void setUp() throws Exception { + super.init(CDP_CDP); + dataSetup01(); + } protected static Boolean dataCleanup(DATACLEANUP datacleanup) { if (DataState.getInstance().isCleanUp()) { diff --git a/src/test/java/com/cloudera/utils/hadoop/hms/MirrorTest01.java b/src/test/java/com/cloudera/utils/hadoop/hms/MirrorTest01.java index 77820b6b..2c8fcc2d 100644 --- a/src/test/java/com/cloudera/utils/hadoop/hms/MirrorTest01.java +++ b/src/test/java/com/cloudera/utils/hadoop/hms/MirrorTest01.java @@ -17,12 +17,20 @@ package com.cloudera.utils.hadoop.hms; +import com.cloudera.utils.hadoop.hms.datastrategy.MirrorTestBase; +import org.junit.Before; import org.junit.Test; import static org.junit.Assert.assertTrue; public class MirrorTest01 extends MirrorTestBase { + @Before + public void setUp() throws Exception { + super.init(HDP2_CDP); + dataSetup01(); + } + @Test public void test_spot_test() { // reset(); diff --git a/src/test/java/com/cloudera/utils/hadoop/hms/TestSQL.java b/src/test/java/com/cloudera/utils/hadoop/hms/TestSQL.java index e3905ced..4138c18f 100644 --- a/src/test/java/com/cloudera/utils/hadoop/hms/TestSQL.java +++ b/src/test/java/com/cloudera/utils/hadoop/hms/TestSQL.java @@ -53,7 +53,7 @@ public interface TestSQL { "STORED AS ORC"; // Escape single quotes with another quote when using MessageFormat.format IE: ' to '' . - String CREATE_AVRO_TBL_SHORT = "CREATE TABLE {0} IF NOT EXISTS " + + String CREATE_AVRO_TBL_SHORT = "CREATE TABLE IF NOT EXISTS {0} " + "STORED AS AVRO " + "TBLPROPERTIES (" + "''avro.schema.url''=''{1}'')"; diff --git a/src/test/java/com/cloudera/utils/hadoop/hms/AVROMigrationTest.java b/src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/AVROMigrationTest.java similarity index 87% rename from src/test/java/com/cloudera/utils/hadoop/hms/AVROMigrationTest.java rename to src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/AVROMigrationTest.java index 36303551..a90bccbd 100644 --- a/src/test/java/com/cloudera/utils/hadoop/hms/AVROMigrationTest.java +++ b/src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/AVROMigrationTest.java @@ -15,16 +15,14 @@ * */ -package com.cloudera.utils.hadoop.hms; +package com.cloudera.utils.hadoop.hms.datastrategy; -import com.cloudera.utils.hadoop.hms.mirror.MirrorConf; +import com.cloudera.utils.hadoop.hms.DataState; +import com.cloudera.utils.hadoop.hms.Mirror; import com.cloudera.utils.hadoop.hms.mirror.Pair; -import org.junit.After; -import org.junit.AfterClass; import org.junit.Before; import org.junit.Test; -import java.text.MessageFormat; import java.util.ArrayList; import java.util.List; @@ -33,24 +31,14 @@ import static org.junit.Assert.assertTrue; public class AVROMigrationTest extends MirrorTestBase { - @AfterClass - public static void tearDownClass() throws Exception { - dataCleanup(DATACLEANUP.BOTH); - } @Before - public void setUp() throws Exception { - super.setUp(); - DataState.getInstance().setConfiguration(HDP2_CDP); -// dataSetupAvro(); - } - - @After - public void tearDown() throws Exception { - dataCleanup(DATACLEANUP.RIGHT); + public void init() throws Exception { + super.init(HDP2_CDP); +// DataState.getInstance().setConfiguration(HDP2_CDP); + dataSetupAvro(); } - public Boolean dataSetupAvro() { if (!DataState.getInstance().isDataCreated("avro")) { String nameofCurrMethod = new Throwable() diff --git a/src/test/java/com/cloudera/utils/hadoop/hms/ConfigValidationTest.java b/src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/ConfigValidationTest.java similarity index 93% rename from src/test/java/com/cloudera/utils/hadoop/hms/ConfigValidationTest.java rename to src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/ConfigValidationTest.java index ce3d6b39..17764cd9 100644 --- a/src/test/java/com/cloudera/utils/hadoop/hms/ConfigValidationTest.java +++ b/src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/ConfigValidationTest.java @@ -15,42 +15,24 @@ * */ -package com.cloudera.utils.hadoop.hms; +package com.cloudera.utils.hadoop.hms.datastrategy; +import com.cloudera.utils.hadoop.hms.DataState; +import com.cloudera.utils.hadoop.hms.Mirror; import com.cloudera.utils.hadoop.hms.mirror.MessageCode; -import com.cloudera.utils.hadoop.hms.mirror.Pair; -import org.junit.After; -import org.junit.AfterClass; import org.junit.Before; import org.junit.Test; -import java.util.ArrayList; -import java.util.List; - -import static com.cloudera.utils.hadoop.hms.TestSQL.*; import static org.junit.Assert.assertTrue; public class ConfigValidationTest extends MirrorTestBase { + @Before - public void setUp() throws Exception { - super.setUp(); - DataState.getInstance().setConfiguration(CDP_CDP); - if (DataState.getInstance().getPopulate() == null) { - DataState.getInstance().setPopulate(Boolean.FALSE); - } + public void init() throws Exception { + super.init(HDP2_CDP); dataSetup01(); } - @After - public void tearDown() throws Exception { - dataCleanup(DATACLEANUP.RIGHT); - } - - @AfterClass - public static void tearDownClass() throws Exception { - dataCleanup(DATACLEANUP.BOTH); - } - @Test public void test_storage_migration_01() { String nameofCurrMethod = new Throwable() @@ -67,9 +49,7 @@ public void test_storage_migration_01() { long rtn = 0; Mirror mirror = new Mirror(); rtn = mirror.go(args); - long check = MessageCode.STORAGE_MIGRATION_REQUIRED_NAMESPACE.getLong(); -// check = check | MessageCode.STORAGE_MIGRATION_REQUIRED_STRATEGY.getLong(); - check = check | MessageCode.STORAGE_MIGRATION_REQUIRED_WAREHOUSE_OPTIONS.getLong(); + long check = MessageCode.STORAGE_MIGRATION_REQUIRED_WAREHOUSE_OPTIONS.getLong(); assertTrue("Return Code Failure: " + rtn + " doesn't match: " + check, rtn == check); } @@ -94,7 +74,7 @@ public void test_storage_migration_w_ma_distcp_01() { Mirror mirror = new Mirror(); rtn = mirror.go(args); long check = MessageCode.STORAGE_MIGRATION_DISTCP_ACID.getLong(); - check = check | MessageCode.STORAGE_MIGRATION_REQUIRED_NAMESPACE.getLong(); +// check = check | MessageCode.STORAGE_MIGRATION_REQUIRED_NAMESPACE.getLong(); check = check | MessageCode.STORAGE_MIGRATION_REQUIRED_WAREHOUSE_OPTIONS.getLong(); assertTrue("Return Code Failure: " + rtn + " doesn't match: " + check, rtn == check); diff --git a/src/test/java/com/cloudera/utils/hadoop/hms/ConfigValidationTest01.java b/src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/ConfigValidationTest01.java similarity index 83% rename from src/test/java/com/cloudera/utils/hadoop/hms/ConfigValidationTest01.java rename to src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/ConfigValidationTest01.java index 416a946a..9df990a5 100644 --- a/src/test/java/com/cloudera/utils/hadoop/hms/ConfigValidationTest01.java +++ b/src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/ConfigValidationTest01.java @@ -15,44 +15,28 @@ * */ -package com.cloudera.utils.hadoop.hms; +package com.cloudera.utils.hadoop.hms.datastrategy; +import com.cloudera.utils.hadoop.hms.DataState; +import com.cloudera.utils.hadoop.hms.Mirror; import com.cloudera.utils.hadoop.hms.mirror.MessageCode; -import com.cloudera.utils.hadoop.hms.mirror.Pair; -import org.junit.After; -import org.junit.AfterClass; import org.junit.Before; import org.junit.Test; -import java.util.ArrayList; -import java.util.List; +import java.io.IOException; -import static com.cloudera.utils.hadoop.hms.TestSQL.*; import static org.junit.Assert.assertTrue; public class ConfigValidationTest01 extends MirrorTestBase { + @Before - public void setUp() throws Exception { - super.setUp(); - DataState.getInstance().setConfiguration(HDP2_CDP); - if (DataState.getInstance().getPopulate() == null) { - DataState.getInstance().setPopulate(Boolean.FALSE); - } + public void init() throws Exception { + super.init(HDP2_CDP); dataSetup01(); } - @After - public void tearDown() throws Exception { - dataCleanup(DATACLEANUP.RIGHT); - } - - @AfterClass - public static void tearDownClass() throws Exception { - dataCleanup(DATACLEANUP.BOTH); - } - @Test - public void test_so() { + public void test_so() throws IOException { DataState.getInstance().setConfiguration(HDP2_CDP); String nameofCurrMethod = new Throwable() diff --git a/src/test/java/com/cloudera/utils/hadoop/hms/DistcpMigrationTest.java b/src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/DistcpMigrationTest.java similarity index 91% rename from src/test/java/com/cloudera/utils/hadoop/hms/DistcpMigrationTest.java rename to src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/DistcpMigrationTest.java index 7a4a1130..fc7ea99a 100644 --- a/src/test/java/com/cloudera/utils/hadoop/hms/DistcpMigrationTest.java +++ b/src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/DistcpMigrationTest.java @@ -15,10 +15,10 @@ * */ -package com.cloudera.utils.hadoop.hms; +package com.cloudera.utils.hadoop.hms.datastrategy; -import org.junit.After; -import org.junit.AfterClass; +import com.cloudera.utils.hadoop.hms.DataState; +import com.cloudera.utils.hadoop.hms.Mirror; import org.junit.Before; import org.junit.Test; @@ -26,26 +26,12 @@ public class DistcpMigrationTest extends MirrorTestBase { - @AfterClass - public static void tearDownClass() throws Exception { - dataCleanup(DATACLEANUP.BOTH); - } - @Before - public void setUp() throws Exception { - super.setUp(); - DataState.getInstance().setConfiguration(CDP_CDP); - if (DataState.getInstance().getPopulate() == null) { - DataState.getInstance().setPopulate(Boolean.FALSE); - } + public void init() throws Exception { + super.init(HDP2_CDP); dataSetup01(); } - @After - public void tearDown() throws Exception { - dataCleanup(DATACLEANUP.RIGHT); - } - @Test public void test_so_distcp_leg() { String nameofCurrMethod = new Throwable() diff --git a/src/test/java/com/cloudera/utils/hadoop/hms/DumpTest.java b/src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/DumpTest.java similarity index 73% rename from src/test/java/com/cloudera/utils/hadoop/hms/DumpTest.java rename to src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/DumpTest.java index b62b61ea..379e6bbc 100644 --- a/src/test/java/com/cloudera/utils/hadoop/hms/DumpTest.java +++ b/src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/DumpTest.java @@ -15,36 +15,23 @@ * */ -package com.cloudera.utils.hadoop.hms; +package com.cloudera.utils.hadoop.hms.datastrategy; -import org.junit.After; -import org.junit.AfterClass; +import com.cloudera.utils.hadoop.hms.DataState; +import com.cloudera.utils.hadoop.hms.Mirror; import org.junit.Before; import org.junit.Test; import static org.junit.Assert.assertTrue; public class DumpTest extends MirrorTestBase { + @Before - public void setUp() throws Exception { - super.setUp(); - DataState.getInstance().setConfiguration(CDP); - if (DataState.getInstance().getPopulate() == null) { - DataState.getInstance().setPopulate(Boolean.FALSE); - } + public void init() throws Exception { + super.init(HDP2_CDP); dataSetup01(); } - @After - public void tearDown() throws Exception { - dataCleanup(DATACLEANUP.RIGHT); - } - - @AfterClass - public static void tearDownClass() throws Exception { - dataCleanup(DATACLEANUP.BOTH); - } - @Test public void test_dump_01() { String nameofCurrMethod = new Throwable() diff --git a/src/test/java/com/cloudera/utils/hadoop/hms/ExpImpDataMigrationTest.java b/src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/ExpImpDataMigrationTest.java similarity index 92% rename from src/test/java/com/cloudera/utils/hadoop/hms/ExpImpDataMigrationTest.java rename to src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/ExpImpDataMigrationTest.java index a04515c3..ad1abf4a 100644 --- a/src/test/java/com/cloudera/utils/hadoop/hms/ExpImpDataMigrationTest.java +++ b/src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/ExpImpDataMigrationTest.java @@ -15,39 +15,23 @@ * */ -package com.cloudera.utils.hadoop.hms; +package com.cloudera.utils.hadoop.hms.datastrategy; -import com.cloudera.utils.hadoop.hms.mirror.Pair; -import org.junit.After; -import org.junit.AfterClass; +import com.cloudera.utils.hadoop.hms.DataState; +import com.cloudera.utils.hadoop.hms.Mirror; import org.junit.Before; import org.junit.Test; -import java.util.ArrayList; -import java.util.List; - -import static com.cloudera.utils.hadoop.hms.TestSQL.*; import static org.junit.Assert.assertTrue; public class ExpImpDataMigrationTest extends MirrorTestBase { - @AfterClass - public static void tearDownClass() throws Exception { - dataCleanup(DATACLEANUP.BOTH); - } - @Before - public void setUp() throws Exception { - super.setUp(); - DataState.getInstance().setConfiguration(CDP_CDP); + public void init() throws Exception { + super.init(CDP_CDP); dataSetup01(); } - @After - public void tearDown() throws Exception { - dataCleanup(DATACLEANUP.RIGHT); - } - @Test public void test_acid_exp_imp() { String nameofCurrMethod = new Throwable() diff --git a/src/test/java/com/cloudera/utils/hadoop/hms/HybridDataMigrationTest.java b/src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/HybridDataMigrationTest.java similarity index 92% rename from src/test/java/com/cloudera/utils/hadoop/hms/HybridDataMigrationTest.java rename to src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/HybridDataMigrationTest.java index 91d66351..78314bb7 100644 --- a/src/test/java/com/cloudera/utils/hadoop/hms/HybridDataMigrationTest.java +++ b/src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/HybridDataMigrationTest.java @@ -15,39 +15,23 @@ * */ -package com.cloudera.utils.hadoop.hms; +package com.cloudera.utils.hadoop.hms.datastrategy; -import com.cloudera.utils.hadoop.hms.mirror.Pair; -import org.junit.After; -import org.junit.AfterClass; +import com.cloudera.utils.hadoop.hms.DataState; +import com.cloudera.utils.hadoop.hms.Mirror; import org.junit.Before; import org.junit.Test; -import java.util.ArrayList; -import java.util.List; - -import static com.cloudera.utils.hadoop.hms.TestSQL.*; import static org.junit.Assert.assertTrue; public class HybridDataMigrationTest extends MirrorTestBase { - @AfterClass - public static void tearDownClass() throws Exception { - dataCleanup(DATACLEANUP.BOTH); - } - @Before - public void setUp() throws Exception { - super.setUp(); - DataState.getInstance().setConfiguration(CDP_CDP); + public void init() throws Exception { + super.init(CDP_CDP); dataSetup01(); } - @After - public void tearDown() throws Exception { - dataCleanup(DATACLEANUP.RIGHT); - } - @Test public void test_acid_hybrid_da_cs_all() { String nameofCurrMethod = new Throwable() diff --git a/src/test/java/com/cloudera/utils/hadoop/hms/LegacyConfigValidationTest.java b/src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/LegacyConfigValidationTest.java similarity index 52% rename from src/test/java/com/cloudera/utils/hadoop/hms/LegacyConfigValidationTest.java rename to src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/LegacyConfigValidationTest.java index 45b0b997..5e8dabda 100644 --- a/src/test/java/com/cloudera/utils/hadoop/hms/LegacyConfigValidationTest.java +++ b/src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/LegacyConfigValidationTest.java @@ -15,152 +15,24 @@ * */ -package com.cloudera.utils.hadoop.hms; +package com.cloudera.utils.hadoop.hms.datastrategy; +import com.cloudera.utils.hadoop.hms.DataState; +import com.cloudera.utils.hadoop.hms.Mirror; import com.cloudera.utils.hadoop.hms.mirror.MessageCode; -import com.cloudera.utils.hadoop.hms.mirror.Pair; -import org.junit.After; -import org.junit.AfterClass; import org.junit.Before; import org.junit.Test; -import java.util.ArrayList; -import java.util.List; - -import static com.cloudera.utils.hadoop.hms.TestSQL.*; import static org.junit.Assert.assertTrue; public class LegacyConfigValidationTest extends MirrorTestBase { - @AfterClass - public static void tearDownClass() throws Exception { - dataCleanup(DATACLEANUP.BOTH); - } - @Before - public void setUp() throws Exception { - super.setUp(); - DataState.getInstance().setConfiguration(HDP2_CDP); + public void init() throws Exception { + super.init(HDP2_CDP); dataSetup01(); } - @After - public void tearDown() throws Exception { - dataCleanup(DATACLEANUP.RIGHT); - } - -// `-r` Feature removed for now.. -// @Test -// public void test_acid_hybrid_da_cs_r_all_leg() { -// String nameofCurrMethod = new Throwable() -// .getStackTrace()[0] -// .getMethodName(); -// -// String outputDir = outputDirBase + nameofCurrMethod; -// -// String[] args = new String[]{"-d", "HYBRID", "-db", DataState.getInstance().getWorking_db(), -// "-ma", "-da", "-r", "-cs", common_storage, -// "-o", outputDir, "-cfg", DataState.getInstance().getConfiguration()}; -// args = toExecute(args, execArgs, Boolean.FALSE); -// -// long rtn = 0; -// Mirror mirror = new Mirror(); -// rtn = mirror.go(args); -// long check = MessageCode.REPLACE_ONLY_WITH_SQL.getLong(); -// -// assertTrue("Return Code Failure: " + rtn + " expecting: " + check, rtn == check); -// } - -// `-r` Feature removed for now.. -// @Test -// public void test_acid_hybrid_da_cs_r_leg() { -// String nameofCurrMethod = new Throwable() -// .getStackTrace()[0] -// .getMethodName(); -// -// String outputDir = outputDirBase + nameofCurrMethod; -// -// String[] args = new String[]{"-d", "HYBRID", "-db", DataState.getInstance().getWorking_db(), -// "-mao", "-da", "-r", "-cs", common_storage, -// "-o", outputDir, "-cfg", DataState.getInstance().getConfiguration()}; -// args = toExecute(args, execArgs, Boolean.FALSE); -// -// long rtn = 0; -// Mirror mirror = new Mirror(); -// rtn = mirror.go(args); -// -// long check = MessageCode.REPLACE_ONLY_WITH_SQL.getLong(); -// -// assertTrue("Return Code Failure: " + rtn + " expecting: " + check, rtn == check); -// } - -// `-r` Feature removed for now.. -// @Test -// public void test_acid_hybrid_da_r_leg() { -// String nameofCurrMethod = new Throwable() -// .getStackTrace()[0] -// .getMethodName(); -// -// String outputDir = outputDirBase + nameofCurrMethod; -// -// String[] args = new String[]{"-d", "HYBRID", "-db", DataState.getInstance().getWorking_db(), -// "-mao", "-da", "-r", -// "-o", outputDir, "-cfg", DataState.getInstance().getConfiguration()}; -// args = toExecute(args, execArgs, Boolean.FALSE); -// -// long rtn = 0; -// Mirror mirror = new Mirror(); -// rtn = mirror.go(args); -// -// long check = MessageCode.REPLACE_ONLY_WITH_SQL.getLong(); -// -// assertTrue("Return Code Failure: " + rtn + " expecting: " + check, rtn == check); -// } - -// `-r` Feature removed for now.. -// @Test -// public void test_acid_hybrid_r_leg() { -// String nameofCurrMethod = new Throwable() -// .getStackTrace()[0] -// .getMethodName(); -// -// String outputDir = outputDirBase + nameofCurrMethod; -// -// String[] args = new String[]{"-d", "HYBRID", "-db", DataState.getInstance().getWorking_db(), -// "-mao", "-r", -// "-o", outputDir, "-cfg", DataState.getInstance().getConfiguration()}; -// args = toExecute(args, execArgs, Boolean.FALSE); -// -// long rtn = 0; -// Mirror mirror = new Mirror(); -// rtn = mirror.go(args); -// -// long check = MessageCode.REPLACE_ONLY_WITH_SQL.getLong(); -// check = check | MessageCode.REPLACE_ONLY_WITH_DA.getLong(); -// assertTrue("Return Code Failure: " + rtn + " expecting: " + check, rtn == check); -// } - -// `-r` Feature removed for now.. -// @Test -// public void test_acid_sql_r_leg() { -// String nameofCurrMethod = new Throwable() -// .getStackTrace()[0] -// .getMethodName(); -// -// String outputDir = outputDirBase + nameofCurrMethod; -// -// String[] args = new String[]{"-d", "SQL", "-db", DataState.getInstance().getWorking_db(), -// "-mao", "-r", -// "-o", outputDir, "-cfg", DataState.getInstance().getConfiguration()}; -// args = toExecute(args, execArgs, Boolean.FALSE); -// -// long rtn = 0; -// Mirror mirror = new Mirror(); -// rtn = mirror.go(args); -// long check = MessageCode.REPLACE_ONLY_WITH_DA.getLong(); -// assertTrue("Return Code Failure: " + rtn + " expecting: " + check, rtn == check); -// } - @Test public void test_so_ro_leg() { String nameofCurrMethod = new Throwable() diff --git a/src/test/java/com/cloudera/utils/hadoop/hms/LegacyDistcpMigrationTest.java b/src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/LegacyDistcpMigrationTest.java similarity index 91% rename from src/test/java/com/cloudera/utils/hadoop/hms/LegacyDistcpMigrationTest.java rename to src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/LegacyDistcpMigrationTest.java index 70ef54ff..c0b8d12b 100644 --- a/src/test/java/com/cloudera/utils/hadoop/hms/LegacyDistcpMigrationTest.java +++ b/src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/LegacyDistcpMigrationTest.java @@ -15,11 +15,10 @@ * */ -package com.cloudera.utils.hadoop.hms; +package com.cloudera.utils.hadoop.hms.datastrategy; -import com.cloudera.utils.hadoop.hms.mirror.MessageCode; -import org.junit.After; -import org.junit.AfterClass; +import com.cloudera.utils.hadoop.hms.DataState; +import com.cloudera.utils.hadoop.hms.Mirror; import org.junit.Before; import org.junit.Test; @@ -27,26 +26,12 @@ public class LegacyDistcpMigrationTest extends MirrorTestBase { - @AfterClass - public static void tearDownClass() throws Exception { - dataCleanup(DATACLEANUP.BOTH); - } - @Before - public void setUp() throws Exception { - super.setUp(); - DataState.getInstance().setConfiguration(HDP2_CDP); - if (DataState.getInstance().getPopulate() == null) { - DataState.getInstance().setPopulate(Boolean.FALSE); - } + public void init() throws Exception { + super.init(HDP2_CDP); dataSetup01(); } - @After - public void tearDown() throws Exception { - dataCleanup(DATACLEANUP.RIGHT); - } - @Test public void test_so_distcp_leg() { String nameofCurrMethod = new Throwable() diff --git a/src/test/java/com/cloudera/utils/hadoop/hms/LegacyExpImpDataMigrationTest.java b/src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/LegacyExpImpDataMigrationTest.java similarity index 88% rename from src/test/java/com/cloudera/utils/hadoop/hms/LegacyExpImpDataMigrationTest.java rename to src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/LegacyExpImpDataMigrationTest.java index 9c9b0b97..0df83702 100644 --- a/src/test/java/com/cloudera/utils/hadoop/hms/LegacyExpImpDataMigrationTest.java +++ b/src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/LegacyExpImpDataMigrationTest.java @@ -15,39 +15,23 @@ * */ -package com.cloudera.utils.hadoop.hms; +package com.cloudera.utils.hadoop.hms.datastrategy; -import com.cloudera.utils.hadoop.hms.mirror.Pair; -import org.junit.After; -import org.junit.AfterClass; +import com.cloudera.utils.hadoop.hms.DataState; +import com.cloudera.utils.hadoop.hms.Mirror; import org.junit.Before; import org.junit.Test; -import java.util.ArrayList; -import java.util.List; - -import static com.cloudera.utils.hadoop.hms.TestSQL.*; import static org.junit.Assert.assertTrue; public class LegacyExpImpDataMigrationTest extends MirrorTestBase { - @AfterClass - public static void tearDownClass() throws Exception { - dataCleanup(DATACLEANUP.BOTH); - } - @Before - public void setUp() throws Exception { - super.setUp(); - DataState.getInstance().setConfiguration(HDP2_CDP); + public void init() throws Exception { + super.init(HDP2_CDP); dataSetup01(); } - @After - public void tearDown() throws Exception { - dataCleanup(DATACLEANUP.RIGHT); - } - @Test public void test_acid_exp_imp() { String nameofCurrMethod = new Throwable() diff --git a/src/test/java/com/cloudera/utils/hadoop/hms/LegacyHybridDataMigrationTest.java b/src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/LegacyHybridDataMigrationTest.java similarity index 93% rename from src/test/java/com/cloudera/utils/hadoop/hms/LegacyHybridDataMigrationTest.java rename to src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/LegacyHybridDataMigrationTest.java index 26234a89..30784e1e 100644 --- a/src/test/java/com/cloudera/utils/hadoop/hms/LegacyHybridDataMigrationTest.java +++ b/src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/LegacyHybridDataMigrationTest.java @@ -15,39 +15,23 @@ * */ -package com.cloudera.utils.hadoop.hms; +package com.cloudera.utils.hadoop.hms.datastrategy; -import com.cloudera.utils.hadoop.hms.mirror.Pair; -import org.junit.After; -import org.junit.AfterClass; +import com.cloudera.utils.hadoop.hms.DataState; +import com.cloudera.utils.hadoop.hms.Mirror; import org.junit.Before; import org.junit.Test; -import java.util.ArrayList; -import java.util.List; - -import static com.cloudera.utils.hadoop.hms.TestSQL.*; import static org.junit.Assert.assertTrue; public class LegacyHybridDataMigrationTest extends MirrorTestBase { - @AfterClass - public static void tearDownClass() throws Exception { - dataCleanup(DATACLEANUP.BOTH); - } - @Before - public void setUp() throws Exception { - super.setUp(); - DataState.getInstance().setConfiguration(HDP2_CDP); + public void init() throws Exception { + super.init(HDP2_CDP); dataSetup01(); } - @After - public void tearDown() throws Exception { - dataCleanup(DATACLEANUP.RIGHT); - } - @Test public void test_acid_hybrid_da_cs_all_leg() { String nameofCurrMethod = new Throwable() diff --git a/src/test/java/com/cloudera/utils/hadoop/hms/LegacySQLDataMigrationTest.java b/src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/LegacySQLDataMigrationTest.java similarity index 86% rename from src/test/java/com/cloudera/utils/hadoop/hms/LegacySQLDataMigrationTest.java rename to src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/LegacySQLDataMigrationTest.java index c70e30ca..ae413c5b 100644 --- a/src/test/java/com/cloudera/utils/hadoop/hms/LegacySQLDataMigrationTest.java +++ b/src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/LegacySQLDataMigrationTest.java @@ -15,61 +15,23 @@ * */ -package com.cloudera.utils.hadoop.hms; +package com.cloudera.utils.hadoop.hms.datastrategy; -import com.cloudera.utils.hadoop.hms.mirror.MessageCode; -import com.cloudera.utils.hadoop.hms.mirror.Pair; -import org.junit.After; -import org.junit.AfterClass; +import com.cloudera.utils.hadoop.hms.DataState; +import com.cloudera.utils.hadoop.hms.Mirror; import org.junit.Before; import org.junit.Test; -import java.util.ArrayList; -import java.util.List; - -import static com.cloudera.utils.hadoop.hms.TestSQL.*; import static org.junit.Assert.assertTrue; public class LegacySQLDataMigrationTest extends MirrorTestBase { - @AfterClass - public static void tearDownClass() throws Exception { - dataCleanup(DATACLEANUP.BOTH); - } - @Before - public void setUp() throws Exception { - super.setUp(); - DataState.getInstance().setConfiguration(HDP2_CDP); + public void init() throws Exception { + super.init(HDP2_CDP); dataSetup01(); } - @After - public void tearDown() throws Exception { - dataCleanup(DATACLEANUP.RIGHT); - } - -// `-r` Feature removed for now.. -// @Test -// public void test_acid_sql_da_cs_r_all_leg() { -// String nameofCurrMethod = new Throwable() -// .getStackTrace()[0] -// .getMethodName(); -// -// String outputDir = outputDirBase + nameofCurrMethod; -// -// String[] args = new String[]{"-d", "SQL", "-db", DataState.getInstance().getWorking_db(), -// "-ma", "-da", "-r", "-cs", common_storage, -// "-o", outputDir, "-cfg", DataState.getInstance().getConfiguration()}; -// args = toExecute(args, execArgs, Boolean.FALSE); -// -// long rtn = 0; -// Mirror mirror = new Mirror(); -// rtn = mirror.go(args); -// int check = 0; -// assertTrue("Return Code Failure: " + rtn + " doesn't match: " + check, rtn == check); -// } - @Test public void test_acid_sql_da_leg() { String nameofCurrMethod = new Throwable() diff --git a/src/test/java/com/cloudera/utils/hadoop/hms/LegacySchemaMigrationTest.java b/src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/LegacySchemaMigrationTest.java similarity index 94% rename from src/test/java/com/cloudera/utils/hadoop/hms/LegacySchemaMigrationTest.java rename to src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/LegacySchemaMigrationTest.java index f121a8c9..614712af 100644 --- a/src/test/java/com/cloudera/utils/hadoop/hms/LegacySchemaMigrationTest.java +++ b/src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/LegacySchemaMigrationTest.java @@ -15,43 +15,24 @@ * */ -package com.cloudera.utils.hadoop.hms; +package com.cloudera.utils.hadoop.hms.datastrategy; +import com.cloudera.utils.hadoop.hms.DataState; +import com.cloudera.utils.hadoop.hms.Mirror; import com.cloudera.utils.hadoop.hms.mirror.MessageCode; -import com.cloudera.utils.hadoop.hms.mirror.Pair; -import org.junit.After; -import org.junit.AfterClass; import org.junit.Before; import org.junit.Test; -import java.util.ArrayList; -import java.util.List; - -import static com.cloudera.utils.hadoop.hms.TestSQL.*; import static org.junit.Assert.assertTrue; public class LegacySchemaMigrationTest extends MirrorTestBase { - @AfterClass - public static void tearDownClass() throws Exception { - dataCleanup(DATACLEANUP.BOTH); - } - @Before - public void setUp() throws Exception { - super.setUp(); - DataState.getInstance().setConfiguration(HDP2_CDP); - if (DataState.getInstance().getPopulate() == null) { - DataState.getInstance().setPopulate(Boolean.FALSE); - } + public void init() throws Exception { + super.init(HDP2_CDP); dataSetup01(); } - @After - public void tearDown() throws Exception { - dataCleanup(DATACLEANUP.RIGHT); - } - @Test public void test_acid_b_leg() { String nameofCurrMethod = new Throwable() diff --git a/src/test/java/com/cloudera/utils/hadoop/hms/MirrorTestBase.java b/src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/MirrorTestBase.java similarity index 88% rename from src/test/java/com/cloudera/utils/hadoop/hms/MirrorTestBase.java rename to src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/MirrorTestBase.java index fec1ad3b..fac69961 100644 --- a/src/test/java/com/cloudera/utils/hadoop/hms/MirrorTestBase.java +++ b/src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/MirrorTestBase.java @@ -15,20 +15,25 @@ * */ -package com.cloudera.utils.hadoop.hms; +package com.cloudera.utils.hadoop.hms.datastrategy; +import com.cloudera.utils.hadoop.hms.Context; +import com.cloudera.utils.hadoop.hms.DataState; +import com.cloudera.utils.hadoop.hms.Mirror; +import com.cloudera.utils.hadoop.hms.mirror.Config; import com.cloudera.utils.hadoop.hms.mirror.Environment; import com.cloudera.utils.hadoop.hms.mirror.MirrorConf; import com.cloudera.utils.hadoop.hms.mirror.Pair; import org.apache.commons.lang3.RandomStringUtils; import org.apache.commons.lang3.RandomUtils; +import org.junit.After; +import org.junit.AfterClass; import org.junit.Before; import java.text.MessageFormat; import java.util.ArrayList; import java.util.Iterator; import java.util.List; -import java.util.Locale; import static com.cloudera.utils.hadoop.hms.TestSQL.*; import static com.cloudera.utils.hadoop.hms.TestSQL.TBL_INSERT; @@ -52,6 +57,20 @@ public class MirrorTestBase { private final String fieldCharacters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; + @AfterClass + public static void tearDownClass() throws Exception { +// dataCleanup(DATACLEANUP.BOTH); + } + + public void init(String cfg) throws Exception { + setUp(cfg); + } + + @After + public void tearDown() throws Exception { + dataCleanup(DATACLEANUP.RIGHT); + } + protected static String[] toExecute(String[] one, String[] two, boolean forceExecute) { String[] rtn = one; if (DataState.getInstance().isExecute() || forceExecute) { @@ -106,8 +125,8 @@ public Boolean dataSetup01() { if (DataState.getInstance().getPopulate() == null || DataState.getInstance().getPopulate()) { dataset = getDataset(2, 2000, null); } - build_n_populate(CREATE_EXTERNAL_TBL, TBL_INSERT, dataset, leftSql,new String[]{"ext_part_02"}); - build_n_populate(CREATE_LEGACY_MNGD_TBL, TBL_INSERT, dataset, leftSql,new String[]{"legacy_mngd_01"}); + build_n_populate(CREATE_EXTERNAL_TBL, TBL_INSERT, dataset, leftSql, new String[]{"ext_part_02"}); + build_n_populate(CREATE_LEGACY_MNGD_TBL, TBL_INSERT, dataset, leftSql, new String[]{"legacy_mngd_01"}); Mirror cfgMirror = new Mirror(); long rtn = cfgMirror.setupSqlLeft(args, leftSql); @@ -121,7 +140,9 @@ public enum DATACLEANUP { } protected static Boolean dataCleanup(DATACLEANUP datacleanup) { - if (DataState.getInstance().isCleanUp()) { + // Only Cleanup Data if Clean and Execute Flags are set to prevent downstream issues with test + // dataset. + if (DataState.getInstance().isCleanUp() && DataState.getInstance().isExecute()) { String nameofCurrMethod = new Throwable() .getStackTrace()[0] .getMethodName(); @@ -147,14 +168,22 @@ protected static Boolean dataCleanup(DATACLEANUP datacleanup) { Mirror cfgMirror = new Mirror(); long rtn = 0l; + Config cfg = Context.getInstance().getConfig(); + String ns = null; switch (datacleanup) { case LEFT: + ns = cfg.getCluster(Environment.LEFT).getHcfsNamespace(); rtn = cfgMirror.setupSqlLeft(args, leftSql); break; case RIGHT: + ns = cfg.getCluster(Environment.RIGHT).getHcfsNamespace(); + // Need to figure out which dataset to reset. +// DataState.getInstance().setDataCreated(); rtn = cfgMirror.setupSqlRight(args, rightSql); break; case BOTH: + String lns = cfg.getCluster(Environment.LEFT).getHcfsNamespace(); + String rns = cfg.getCluster(Environment.RIGHT).getHcfsNamespace(); rtn = cfgMirror.setupSql(args, leftSql, rightSql); break; } @@ -166,7 +195,7 @@ protected void build_n_populate(String tableDefTemplate, String insertTemplate, List dataset, List targetPairList, Object[] opts) { MessageFormat mf = new MessageFormat("US"); String tableCreate = MessageFormat.format(tableDefTemplate, opts); - String tableName = (String)opts[0]; + String tableName = (String) opts[0]; Pair createPair = new Pair("Create table: " + tableName, tableCreate); targetPairList.add(createPair); if (dataset != null) { @@ -196,13 +225,10 @@ protected void build_n_populate(String tableDefTemplate, String insertTemplate, } protected void build_use_db(List sqlPairList) { - String dropDb = MessageFormat.format(MirrorConf.DROP_DB, DataState.getInstance().getWorking_db()); - Pair r01p = new Pair("DROP DB: " + DataState.getInstance().getWorking_db(), dropDb); String createDb = MessageFormat.format(MirrorConf.CREATE_DB, DataState.getInstance().getWorking_db()); Pair r02p = new Pair("CREATE DB: " + DataState.getInstance().getWorking_db(), createDb); String useDb = MessageFormat.format(MirrorConf.USE, DataState.getInstance().getWorking_db()); Pair r03p = new Pair("Use DB: " + DataState.getInstance().getWorking_db(), useDb); - sqlPairList.add(r01p); sqlPairList.add(r02p); sqlPairList.add(r03p); } @@ -230,8 +256,9 @@ record = new String[realWidth]; return rtn; } - @Before - public void setUp() throws Exception { + public void setUp(String configLocation) throws Exception { + DataState.getInstance().setConfiguration(configLocation); + // Override default db for test run. String lclWorkingDb = System.getenv("DB"); if (lclWorkingDb != null) { diff --git a/src/test/java/com/cloudera/utils/hadoop/hms/SQLDataMigrationTest.java b/src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/SQLDataMigrationTest.java similarity index 94% rename from src/test/java/com/cloudera/utils/hadoop/hms/SQLDataMigrationTest.java rename to src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/SQLDataMigrationTest.java index f1ffbd56..12e5e726 100644 --- a/src/test/java/com/cloudera/utils/hadoop/hms/SQLDataMigrationTest.java +++ b/src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/SQLDataMigrationTest.java @@ -15,39 +15,23 @@ * */ -package com.cloudera.utils.hadoop.hms; +package com.cloudera.utils.hadoop.hms.datastrategy; -import com.cloudera.utils.hadoop.hms.mirror.Pair; -import org.junit.After; -import org.junit.AfterClass; +import com.cloudera.utils.hadoop.hms.DataState; +import com.cloudera.utils.hadoop.hms.Mirror; import org.junit.Before; import org.junit.Test; -import java.util.ArrayList; -import java.util.List; - -import static com.cloudera.utils.hadoop.hms.TestSQL.*; import static org.junit.Assert.assertTrue; public class SQLDataMigrationTest extends MirrorTestBase { - @AfterClass - public static void tearDownClass() throws Exception { - dataCleanup(DATACLEANUP.BOTH); - } - @Before - public void setUp() throws Exception { - super.setUp(); - DataState.getInstance().setConfiguration(CDP_CDP); + public void init() throws Exception { + super.init(CDP_CDP); dataSetup01(); } - @After - public void tearDown() throws Exception { - dataCleanup(DATACLEANUP.RIGHT); - } - @Test public void test_acid_sql() { String nameofCurrMethod = new Throwable() diff --git a/src/test/java/com/cloudera/utils/hadoop/hms/SchemaMigrationTest.java b/src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/SchemaMigrationTest.java similarity index 95% rename from src/test/java/com/cloudera/utils/hadoop/hms/SchemaMigrationTest.java rename to src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/SchemaMigrationTest.java index 6fc48aef..1741cce9 100644 --- a/src/test/java/com/cloudera/utils/hadoop/hms/SchemaMigrationTest.java +++ b/src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/SchemaMigrationTest.java @@ -15,11 +15,11 @@ * */ -package com.cloudera.utils.hadoop.hms; +package com.cloudera.utils.hadoop.hms.datastrategy; +import com.cloudera.utils.hadoop.hms.DataState; +import com.cloudera.utils.hadoop.hms.Mirror; import com.cloudera.utils.hadoop.hms.mirror.MessageCode; -import org.junit.After; -import org.junit.AfterClass; import org.junit.Before; import org.junit.Test; @@ -27,26 +27,12 @@ public class SchemaMigrationTest extends MirrorTestBase { - @AfterClass - public static void tearDownClass() throws Exception { - dataCleanup(DATACLEANUP.BOTH); - } - @Before - public void setUp() throws Exception { - super.setUp(); - DataState.getInstance().setConfiguration(CDP_CDP); - if (DataState.getInstance().getPopulate() == null) { - DataState.getInstance().setPopulate(Boolean.FALSE); - } + public void init() throws Exception { + super.init(HDP2_CDP); dataSetup01(); } - @After - public void tearDown() throws Exception { - dataCleanup(DATACLEANUP.RIGHT); - } - @Test public void test_acid_b_leg() { String nameofCurrMethod = new Throwable() diff --git a/src/test/java/com/cloudera/utils/hadoop/hms/StorageMigrationTest.java b/src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/StorageMigrationTest.java similarity index 91% rename from src/test/java/com/cloudera/utils/hadoop/hms/StorageMigrationTest.java rename to src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/StorageMigrationTest.java index 9bb074a1..de4c92ef 100644 --- a/src/test/java/com/cloudera/utils/hadoop/hms/StorageMigrationTest.java +++ b/src/test/java/com/cloudera/utils/hadoop/hms/datastrategy/StorageMigrationTest.java @@ -15,39 +15,23 @@ * */ -package com.cloudera.utils.hadoop.hms; +package com.cloudera.utils.hadoop.hms.datastrategy; -import com.cloudera.utils.hadoop.hms.mirror.Pair; -import org.junit.After; -import org.junit.AfterClass; +import com.cloudera.utils.hadoop.hms.DataState; +import com.cloudera.utils.hadoop.hms.Mirror; import org.junit.Before; import org.junit.Test; -import java.util.ArrayList; -import java.util.List; - -import static com.cloudera.utils.hadoop.hms.TestSQL.*; import static org.junit.Assert.assertTrue; public class StorageMigrationTest extends MirrorTestBase { - @AfterClass - public static void tearDownClass() throws Exception { - dataCleanup(DATACLEANUP.BOTH); - } - @Before - public void setUp() throws Exception { - super.setUp(); - DataState.getInstance().setConfiguration(CDP); + public void init() throws Exception { + super.init(HDP2_CDP); dataSetup01(); } - @After - public void tearDown() throws Exception { - dataCleanup(DATACLEANUP.BOTH); - } - @Test public void test_datasetup() { System.out.println("Data setup."); diff --git a/src/test/java/com/cloudera/utils/hadoop/hms/mirror/feature/StructEscapeFieldsFeatureTest.java b/src/test/java/com/cloudera/utils/hadoop/hms/mirror/feature/StructEscapeFieldsFeatureTest.java index e290a542..e67ae58d 100644 --- a/src/test/java/com/cloudera/utils/hadoop/hms/mirror/feature/StructEscapeFieldsFeatureTest.java +++ b/src/test/java/com/cloudera/utils/hadoop/hms/mirror/feature/StructEscapeFieldsFeatureTest.java @@ -97,6 +97,98 @@ public class StructEscapeFieldsFeatureTest extends BaseFeatureTest { " ) " }; + public static String[] schema_04 = new String[]{ + " CREATE EXTERNAL TABLE `my_error`( ", + " `weird_item_id` string, ", + " `weird_id` string, ", + " `member_id` string, ", + " `group_tub_id` string, ", + " `user_assoc_id` string, ", + " `person_nbr` string, ", + " `carrier_tub_id` string, ", + " `weird_reason_cde` string, ", + " `weird_action_cde` string, ", + " `weird_disposition_cde` string, ", + " `weird_resource_id` string, ", + " `start_tms` timestamp, ", + " `end_tms` timestamp, ", + " `start_timezone_txt` string, ", + " `end_timezone_txt` string, ", + " `start_tms_txt` string, ", + " `end_tms_txt` string, ", + " `attendant_agent_id` string, ", + " `weird_direction_dsc` string, ", + " `originating_system_dsc` string, ", + " `weird_intent_dsc` string, ", + " `outcome_dsc` string, ", + " `electronic_mail_address_txt` string, ", + " `city_nme` string, ", + " `state_or_province_cde` string, ", + " `postal_cde` string, ", + " `web_address_txt` string, ", + " `domain_nme` string, ", + " `application_uri_txt` string, ", + " `lob_dsc` string, ", + " `business_segment_dsc` string, ", + " `comment_txt` string, ", + " `street_address_lines_txt` string, ", + " `business_products_dsc` string, ", + " `primary_reason_ind` string, ", + " `primary_aspect_dude_contact_file_ind` string, ", + " `worksite_id` string, ", + " `order_id` string, ", + " `weird_survey_result_id` string, ", + " `weird_survey_id` string, ", + " `phone_number` array>, ", + " `weird_logging_system` array>, ", + " `correlation_special` array>, ", + " `weird_property` array>, ", + " `subject_reference_store_key` array>, ", + " `related_reference_store_key` array>, ", + " `weird_channel` array>, ", + " `pond_last_update_tms` timestamp) ", + " ROW FORMAT SERDE ", + " 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' ", + " STORED AS INPUTFORMAT ", + " 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' ", + " OUTPUTFORMAT ", + " 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' ", + " LOCATION ", + " 'hdfs://MYCLUSTER/big_area.db/my_error' ", + " TBLPROPERTIES ( ", + " 'bucketing_version'='2', ", + " 'discover.partitions'='true', ", + " 'transient_lastDdlTime'='1684275534') " + }; + + public static String[] schema_05 = new String[]{ + " CREATE EXTERNAL TABLE `my_test`( ", + " `tell_me_period_end_dte` date, ", + " `tell_mehum_type_cde` string, ", + " `patient_agn_id` int, ", + " `adherence_condition_txt` string, ", + " `tenant_id` string, ", + " `year_nbr` int, ", + " `primary_channel_type_txt` string, ", + " `measure_type_cde` string, ", + " `index_dte` date, ", + " `from_where_begin_dte` date, ", + " `from_where_end_dte` string, ", + " `from_where_cde` string, ", + " `pond_tms` timestamp, ", + " `pdc_patient_condition_report` array>) ", + " ROW FORMAT SERDE ", + " 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' ", + " STORED AS INPUTFORMAT ", + " 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' ", + " OUTPUTFORMAT ", + " 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' ", + " LOCATION ", + " 'hdfs://MYCLUSTER/test.db/my_test' ", + " TBLPROPERTIES ( ", + " 'bucketing_version'='2', ", + " 'discover.partitions'='true', ", + " 'transient_lastDdlTime'='1684275550') "}; private final String ESCAPE = "`"; @@ -149,4 +241,20 @@ public void test_012() { assertTrue(check); } + @Test + public void test_013() { + List schema = toList(schema_04); + Boolean check = feature.fixSchema(schema); + schema.stream().forEach(System.out::println); + assertTrue(check); + } + + @Test + public void test_014() { + List schema = toList(schema_05); + Boolean check = feature.fixSchema(schema); + schema.stream().forEach(System.out::println); + assertTrue(check); + } + } \ No newline at end of file diff --git a/src/test/java/com/cloudera/utils/hadoop/hms/util/TableUtilsTest.java b/src/test/java/com/cloudera/utils/hadoop/hms/util/TableUtilsTest.java index 52843337..5d04c321 100644 --- a/src/test/java/com/cloudera/utils/hadoop/hms/util/TableUtilsTest.java +++ b/src/test/java/com/cloudera/utils/hadoop/hms/util/TableUtilsTest.java @@ -17,6 +17,7 @@ package com.cloudera.utils.hadoop.hms.util; +import com.cloudera.utils.hadoop.hms.mirror.EnvironmentTable; import com.cloudera.utils.hadoop.hms.mirror.MirrorConf; import org.junit.Before; import org.junit.Test; @@ -25,6 +26,7 @@ import java.util.Arrays; import java.util.List; +import static com.cloudera.utils.hadoop.hms.mirror.TablePropertyVars.TRANSACTIONAL; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; @@ -35,13 +37,16 @@ public class TableUtilsTest { private final List table_03 = new ArrayList(); private final List table_04 = new ArrayList(); private final List table_05 = new ArrayList(); + private final List table_06 = new ArrayList(); + private final List table_07 = new ArrayList(); + @Test public void changeTableName() { List tblDef = TableUtils.changeTableName("call_center", "transfer_call_center", table_04); tblDef = TableUtils.stripLocation("call_center", tblDef); - TableUtils.removeTblProperty(MirrorConf.TRANSACTIONAL, tblDef); - System.out.println("Def: "); + TableUtils.removeTblProperty(TRANSACTIONAL, tblDef); +// System.out.println("Def: "); } @Test @@ -50,8 +55,7 @@ public void testUpdateTableLocation() { String REPLACEMENT_TEST_LOCATION = "hdfs://HOME90/mynew/location"; Boolean rtn = TableUtils.updateTableLocation("hello_manager", working, REPLACEMENT_TEST_LOCATION); assertTrue("Location mismatch: ", TableUtils.getSerdePath("hello_manager", working).equals(REPLACEMENT_TEST_LOCATION)); - System.out.println("Def: "); - +// System.out.println("Def: "); } @Test @@ -66,6 +70,18 @@ public void testTableNameDirMatch_02() { assertTrue(TableUtils.doesTableNameMatchDirectoryName(tblDef)); } + @Test + public void testTableNameDirMatch_03() { + List tblDef = fromStatic(table_06); + assertTrue(TableUtils.doesTableNameMatchDirectoryName(tblDef)); + } + + @Test + public void testTableNameDirMatch_04() { + List tblDef = fromStatic(table_07); + assertTrue(TableUtils.doesTableNameMatchDirectoryName(tblDef)); + } + public List fromStatic(List source) { List rtn = new ArrayList(); for (String line: source) { @@ -79,27 +95,42 @@ public void getLocation() { @Test public void isACID() { - assertTrue(TableUtils.isACID("check_table", table_03)); + EnvironmentTable envTable = new EnvironmentTable(); + envTable.setName("check_table"); + envTable.setDefinition(table_03); + assertTrue(TableUtils.isACID(envTable)); } @Test public void isExternal() { - assertFalse(TableUtils.isExternal("check_table", table_02)); + EnvironmentTable envTable = new EnvironmentTable(); + envTable.setName("check_table"); + envTable.setDefinition(table_02); + assertFalse(TableUtils.isExternal(envTable)); } @Test public void isExternalPurge() { - assertTrue(TableUtils.isExternalPurge("check_table", table_04)); + EnvironmentTable envTable = new EnvironmentTable(); + envTable.setName("check_table"); + envTable.setDefinition(table_04); + assertTrue(TableUtils.isExternalPurge(envTable)); } @Test public void isHMSConverted() { - assertTrue(TableUtils.isHMSConverted("check_table", table_01)); + EnvironmentTable envTable = new EnvironmentTable(); + envTable.setName("check_table"); + envTable.setDefinition(table_01); + assertTrue(TableUtils.isHMSConverted(envTable)); } @Test public void isHive3Standard() { - assertFalse(TableUtils.isHive3Standard("check_table", table_02)); + EnvironmentTable envTable = new EnvironmentTable(); + envTable.setName("check_table"); + envTable.setDefinition(table_02); + assertFalse(TableUtils.isHive3Standard(envTable)); } @Test @@ -109,9 +140,15 @@ public void isLegacyManaged() { @Test public void isManaged() { - assertTrue(TableUtils.isManaged("check_table", table_02)); - assertTrue(TableUtils.isManaged("check_table", table_03)); - assertFalse(TableUtils.isManaged("check_table", table_04)); + EnvironmentTable envTable = new EnvironmentTable(); + envTable.setName("check_table"); + envTable.setDefinition(table_02); + + assertTrue(TableUtils.isManaged(envTable)); + envTable.setDefinition(table_03); + assertTrue(TableUtils.isManaged(envTable)); + envTable.setDefinition(table_04); + assertFalse(TableUtils.isManaged(envTable)); } @Test @@ -125,7 +162,7 @@ public void removeTblProperty() { @Before public void setUp() throws Exception { String[] strTable_01 = new String[]{ - "CREATE EXTERNAL TABLE `tpcds_bin_partitioned_orc_10.call_center`(" + "CREATE EXTERNAL TABLE `tpcds_bin_partitioned_orc_10`.`call_center`(" , " `cc_call_center_sk` bigint, " , " `cc_call_center_id` char(16), " , " `cc_rec_start_date` date, " @@ -167,16 +204,16 @@ public void setUp() throws Exception { , " 'hdfs://HOME90/user/dstreev/datasets/junk'" , "TBLPROPERTIES (" , " 'bucketing_version'='2', " - , " 'hmsMirror_ConversionStage1'='2020-12-02 08:12:28', " - , " 'hmsMirror_LegacyManaged'='true', " - , " 'hmsMirror_Converted'='true', " + , " 'hms-mirror_ConversionStage1'='2020-12-02 08:12:28', " + , " 'hms-mirror_LegacyManaged'='true', " + , " 'hms-mirror_Converted'='true', " , " 'last_modified_by'='dstreev', " , " 'last_modified_time'='1606919590', " , " 'transient_lastDdlTime'='1606919590')" }; table_01.addAll(Arrays.asList(strTable_01)); String[] strTable_02 = new String[]{ - "CREATE TABLE `tpcds_bin_partitioned_orc_10.call_center`(" + "CREATE TABLE `tpcds_bin_partitioned_orc_10`.`call_center`(" , " `cc_call_center_sk` bigint, " , " `cc_call_center_id` char(16), " , " `cc_rec_start_date` date, " @@ -218,16 +255,16 @@ public void setUp() throws Exception { , " 'hdfs://HOME90/user/dstreev/datasets/junk'" , "TBLPROPERTIES (" , " 'bucketing_version'='2', " - , " 'hmsMirror_ConversionStage1'='2020-12-02 08:12:28', " - , " 'hmsMirror_LegacyManaged'='true', " - , " 'hmsMirror_Converted'='true', " + , " 'hms-mirror_ConversionStage1'='2020-12-02 08:12:28', " + , " 'hms-mirror_LegacyManaged'='true', " + , " 'hms-mirror_Converted'='true', " , " 'last_modified_by'='dstreev', " , " 'last_modified_time'='1606919590', " , " 'transient_lastDdlTime'='1606919590')" }; table_02.addAll(Arrays.asList(strTable_02)); String[] strTable_03 = new String[]{ - "CREATE TABLE `tpcds_bin_partitioned_orc_10.call_center`(" + "CREATE TABLE `tpcds_bin_partitioned_orc_10`.`call_center`(" , " `cc_call_center_sk` bigint, " , " `cc_call_center_id` char(16), " , " `cc_rec_start_date` date, " @@ -270,16 +307,16 @@ public void setUp() throws Exception { , "TBLPROPERTIES (" , " 'bucketing_version'='2', " , " 'transactional'='true', " - , " 'hmsMirror_ConversionStage1'='2020-12-02 08:12:28', " - , " 'hmsMirror_LegacyManaged'='true', " - , " 'hmsMirror_Converted'='true', " + , " 'hms-mirror_ConversionStage1'='2020-12-02 08:12:28', " + , " 'hms-mirror_LegacyManaged'='true', " + , " 'hms-mirror_Converted'='true', " , " 'last_modified_by'='dstreev', " , " 'last_modified_time'='1606919590', " , " 'transient_lastDdlTime'='1606919590')" }; table_03.addAll(Arrays.asList(strTable_03)); String[] strTable_04 = new String[]{ - "CREATE EXTERNAL TABLE `tpcds_bin_partitioned_orc_10.call_center`(" + "CREATE EXTERNAL TABLE `tpcds_bin_partitioned_orc_10`.`call_center`(" , " `cc_call_center_sk` bigint, " , " `cc_call_center_id` char(16), " , " `cc_rec_start_date` date, " @@ -323,9 +360,9 @@ public void setUp() throws Exception { , " 'bucketing_version'='2', " , " 'transactional'='true', " , " 'external.table.purge'='true', " - , " 'hmsMirror_ConversionStage1'='2020-12-02 08:12:28', " - , " 'hmsMirror_LegacyManaged'='true', " - , " 'hmsMirror_Converted'='true', " + , " 'hms-mirror_ConversionStage1'='2020-12-02 08:12:28', " + , " 'hms-mirror_LegacyManaged'='true', " + , " 'hms-mirror_Converted'='true', " , " 'last_modified_by'='dstreev', " , " 'last_modified_time'='1606919590', " , " 'transient_lastDdlTime'='1606919590')" @@ -346,9 +383,9 @@ public void setUp() throws Exception { , "LOCATION " , "'hdfs://HDP50/apps/warehouse/hive/merge_files_migrate.db/hello_manager' " , "TBLPROPERTIES ( " - , "'hmsMirror_Metadata_Stage1'='2022-05-26 00:55:38', " - , "'hmsMirror_LegacyManaged'='true', " - , "'hmsMirror_Converted'='true', " + , "'hms-mirror_Metadata_Stage1'='2022-05-26 00:55:38', " + , "'hms-mirror_LegacyManaged'='true', " + , "'hms-mirror_Converted'='true', " , "'external.table.purge'='true', " , " 'spark.sql.create.version'='2.3.0.2.6.5.0-292', " , " 'spark.sql.sources.provider'='orc', " @@ -360,6 +397,113 @@ public void setUp() throws Exception { }; table_05.addAll(Arrays.asList(strTable_05)); + String[] strTable_06 = new String[]{ + "CREATE EXTERNAL TABLE `call_center`(" + , " `cc_call_center_sk` bigint, " + , " `cc_call_center_id` char(16), " + , " `cc_rec_start_date` date, " + , " `cc_rec_end_date` date, " + , " `cc_closed_date_sk` bigint, " + , " `cc_open_date_sk` bigint, " + , " `cc_name` varchar(50), " + , " `cc_class` varchar(50), " + , " `cc_employees` int, " + , " `cc_sq_ft` int, " + , " `cc_hours` char(20), " + , " `cc_manager` varchar(40), " + , " `cc_mkt_id` int, " + , " `cc_mkt_class` char(50), " + , " `cc_mkt_desc` varchar(100), " + , " `cc_market_manager` varchar(40), " + , " `cc_division` int, " + , " `cc_division_name` varchar(50), " + , " `cc_company` int, " + , " `cc_company_name` char(50), " + , " `cc_street_number` char(10), " + , " `cc_street_name` varchar(60), " + , " `cc_street_type` char(15), " + , " `cc_suite_number` char(10), " + , " `cc_city` varchar(60), " + , " `cc_county` varchar(30), " + , " `cc_state` char(2), " + , " `cc_zip` char(10), " + , " `cc_country` varchar(20), " + , " `cc_gmt_offset` decimal(5,2), " + , " `cc_tax_percentage` decimal(5,2))" + , "ROW FORMAT SERDE " + , " 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' " + , "STORED AS INPUTFORMAT " + , " 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' " + , "OUTPUTFORMAT " + , " 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'" + , "LOCATION" + , " 'hdfs://HOME90/user/dstreev/datasets/call_center'" + , "TBLPROPERTIES (" + , " 'bucketing_version'='2', " + , " 'transactional'='true', " + , " 'external.table.purge'='true', " + , " 'hms-mirror_ConversionStage1'='2020-12-02 08:12:28', " + , " 'hms-mirror_LegacyManaged'='true', " + , " 'hms-mirror_Converted'='true', " + , " 'last_modified_by'='dstreev', " + , " 'last_modified_time'='1606919590', " + , " 'transient_lastDdlTime'='1606919590')" + }; + table_06.addAll(Arrays.asList(strTable_06)); + + String[] strTable_07 = new String[]{ + "CREATE EXTERNAL TABLE call_center(" + , " `cc_call_center_sk` bigint, " + , " `cc_call_center_id` char(16), " + , " `cc_rec_start_date` date, " + , " `cc_rec_end_date` date, " + , " `cc_closed_date_sk` bigint, " + , " `cc_open_date_sk` bigint, " + , " `cc_name` varchar(50), " + , " `cc_class` varchar(50), " + , " `cc_employees` int, " + , " `cc_sq_ft` int, " + , " `cc_hours` char(20), " + , " `cc_manager` varchar(40), " + , " `cc_mkt_id` int, " + , " `cc_mkt_class` char(50), " + , " `cc_mkt_desc` varchar(100), " + , " `cc_market_manager` varchar(40), " + , " `cc_division` int, " + , " `cc_division_name` varchar(50), " + , " `cc_company` int, " + , " `cc_company_name` char(50), " + , " `cc_street_number` char(10), " + , " `cc_street_name` varchar(60), " + , " `cc_street_type` char(15), " + , " `cc_suite_number` char(10), " + , " `cc_city` varchar(60), " + , " `cc_county` varchar(30), " + , " `cc_state` char(2), " + , " `cc_zip` char(10), " + , " `cc_country` varchar(20), " + , " `cc_gmt_offset` decimal(5,2), " + , " `cc_tax_percentage` decimal(5,2))" + , "ROW FORMAT SERDE " + , " 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' " + , "STORED AS INPUTFORMAT " + , " 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' " + , "OUTPUTFORMAT " + , " 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'" + , "LOCATION" + , " 'hdfs://HOME90/user/dstreev/datasets/call_center'" + , "TBLPROPERTIES (" + , " 'bucketing_version'='2', " + , " 'transactional'='true', " + , " 'external.table.purge'='true', " + , " 'hms-mirror_ConversionStage1'='2020-12-02 08:12:28', " + , " 'hms-mirror_LegacyManaged'='true', " + , " 'hms-mirror_Converted'='true', " + , " 'last_modified_by'='dstreev', " + , " 'last_modified_time'='1606919590', " + , " 'transient_lastDdlTime'='1606919590')" + }; + table_07.addAll(Arrays.asList(strTable_07)); }