diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index b91fa381f..3684867e2 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -33,12 +33,12 @@ jobs: distribution: temurin java-version: ${{matrix.java}} - run: java -version - - run: .kokoro/build.sh - env: - JOB_TYPE: test - units-java8: - # Building using Java 17 and run the tests with Java 8 runtime - name: "units (8)" + - run: mvn -B -ntp test + # TODO: remove in the target repo + working-directory: google-cloud-bigtable-kafka-connect-sink + units-java11: + # Building using Java 21 and run the tests with Java 11 runtime + name: "units (11)" runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -53,71 +53,79 @@ jobs: shell: bash - uses: actions/setup-java@v4 with: - java-version: 17 + java-version: 21 distribution: temurin - - run: .kokoro/build.sh - env: - JOB_TYPE: test - windows: - runs-on: windows-latest + - run: mvn -B -ntp test + # TODO: remove in the target repo + working-directory: google-cloud-bigtable-kafka-connect-sink + integrations: + runs-on: ubuntu-latest steps: - - name: Support longpaths - run: git config --system core.longpaths true - uses: actions/checkout@v4 - uses: actions/setup-java@v4 with: distribution: temurin - java-version: 8 + java-version: 11 - run: java -version - - run: .kokoro/build.bat - env: - JOB_TYPE: test - dependencies: + - name: 'Set up Cloud SDK' + uses: 'google-github-actions/setup-gcloud@v2' + with: + version: latest + install_components: beta,bigtable + - run: | + set -euo pipefail + gcloud beta emulators bigtable start --host-port=0.0.0.0:8086 & + mvn -B -ntp verify -DskipUnitTests + # TODO: remove in the target repo + working-directory: google-cloud-bigtable-kafka-connect-sink + package: runs-on: ubuntu-latest - strategy: - matrix: - java: [17] steps: - uses: actions/checkout@v4 - uses: actions/setup-java@v4 with: distribution: temurin - java-version: ${{matrix.java}} + java-version: 11 - run: java -version - - run: .kokoro/dependencies.sh - javadoc: - runs-on: ubuntu-latest + - run: mvn -B -ntp package -Dmaven.test.skip + # TODO: remove in the target repo + working-directory: google-cloud-bigtable-kafka-connect-sink + windows: + runs-on: windows-latest steps: + - name: Support longpaths + run: git config --system core.longpaths true - uses: actions/checkout@v4 - uses: actions/setup-java@v4 with: distribution: temurin - java-version: 17 + java-version: 11 - run: java -version - - run: .kokoro/build.sh - env: - JOB_TYPE: javadoc - lint: + - run: mvn -B -ntp test + # TODO: remove in the target repo + working-directory: google-cloud-bigtable-kafka-connect-sink + javadoc: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: actions/setup-java@v4 with: distribution: temurin - java-version: 11 + java-version: 17 - run: java -version - - run: .kokoro/build.sh - env: - JOB_TYPE: lint - clirr: + # TODO: also run javadoc:test-javadoc? + - run: mvn -B -ntp javadoc:javadoc + # TODO: remove in the target repo + working-directory: google-cloud-bigtable-kafka-connect-sink + lint: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: actions/setup-java@v4 with: distribution: temurin - java-version: 8 + java-version: 11 - run: java -version - - run: .kokoro/build.sh - env: - JOB_TYPE: clirr + - run: mvn -B -ntp spotless:check + # TODO: remove in the target repo + working-directory: google-cloud-bigtable-kafka-connect-sink diff --git a/google-cloud-bigtable-kafka-connect-sink/.gitignore b/google-cloud-bigtable-kafka-connect-sink/.gitignore new file mode 100644 index 000000000..612c5bc96 --- /dev/null +++ b/google-cloud-bigtable-kafka-connect-sink/.gitignore @@ -0,0 +1,3 @@ +target +.idea +*.iml diff --git a/google-cloud-bigtable-kafka-connect-sink/LICENSE b/google-cloud-bigtable-kafka-connect-sink/LICENSE new file mode 100644 index 000000000..261eeb9e9 --- /dev/null +++ b/google-cloud-bigtable-kafka-connect-sink/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/google-cloud-bigtable-kafka-connect-sink/assembly_package.xml b/google-cloud-bigtable-kafka-connect-sink/assembly_package.xml new file mode 100644 index 000000000..1a4de4100 --- /dev/null +++ b/google-cloud-bigtable-kafka-connect-sink/assembly_package.xml @@ -0,0 +1,18 @@ + + package + + dir + + false + + + / + true + true + runtime + false + + + \ No newline at end of file diff --git a/google-cloud-bigtable-kafka-connect-sink/license.header b/google-cloud-bigtable-kafka-connect-sink/license.header new file mode 100644 index 000000000..370494894 --- /dev/null +++ b/google-cloud-bigtable-kafka-connect-sink/license.header @@ -0,0 +1,15 @@ +/* + * Copyright $YEAR Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ diff --git a/google-cloud-bigtable-kafka-connect-sink/pom.xml b/google-cloud-bigtable-kafka-connect-sink/pom.xml new file mode 100644 index 000000000..30340f29e --- /dev/null +++ b/google-cloud-bigtable-kafka-connect-sink/pom.xml @@ -0,0 +1,279 @@ + + + 4.0.0 + com.google.cloud.kafka.connect.bigtable + sink + 1.0.0-SNAPSHOT + jar + kafka-connect-bigtable-sink + + Google Bigtable sink connector for Apache Kafka Connect + + + 26.31.0 + 3.8.1 + 2.12 + 2.6.1 + 1.7.36 + 2.16.2 + + 5.14.2 + 4.13.2 + + 5.11.3 + 2.43.0 + 1.19.2 + 3.11.2 + 0.8.12 + 3.5.2 + 3.5.2 + 3.4.2 + + 11 + 11 + UTF-8 + + + + + com.google.cloud + libraries-bom + ${google.cloud.bom.version} + pom + import + + + + + + org.apache.kafka + connect-api + ${kafka.version} + provided + + + org.apache.kafka + connect-runtime + ${kafka.version} + provided + + + org.apache.kafka + kafka-clients + ${kafka.version} + + + com.google.cloud + google-cloud-bigtable + + + org.apache.hbase + hbase-common + ${hbase.version} + + + org.slf4j + slf4j-api + ${slf4j.version} + + + org.slf4j + slf4j-simple + ${slf4j.version} + + + com.fasterxml.jackson.core + jackson-databind + ${jackson.version} + + + junit + junit + ${junit.version} + test + + + org.apache.kafka + connect-runtime + ${kafka.version} + test + test + + + org.apache.kafka + kafka-clients + ${kafka.version} + test + test-jar + test + + + org.apache.kafka + kafka_${kafka.scala.version} + ${kafka.version} + test + + + org.apache.kafka + kafka_${kafka.scala.version} + ${kafka.version} + test + test-jar + test + + + org.mockito + mockito-core + ${mockito.version} + test + + + org.junit.jupiter + junit-jupiter-api + ${junit.jupiter.version} + test + + + + + + + org.apache.maven.plugins + maven-assembly-plugin + + + assembly_package.xml + + + + + make-assembly + package + + single + + + + + + com.diffplug.spotless + spotless-maven-plugin + ${spotless.version} + + + + + .gitignore + + + + + true + 4 + + + + + + ${google.java.format.version} + + true + true + + + ${project.basedir}/license.header + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + ${surefire.version} + + + ${skipUnitTests} + false + + classes + true + + + target/test-classes/fake_service_key.json + + + + + org.apache.maven.plugins + maven-failsafe-plugin + ${failsafe.version} + + + ${skipIntegrationTests} + false + + classes + true + + + target/test-classes/fake_service_key.json + + localhost:8086 + + + + + + verify + integration-test + + + + + + org.apache.maven.plugins + maven-jar-plugin + ${maven-jar.version} + + + + true + + + + + + org.jacoco + jacoco-maven-plugin + ${jacoco.version} + + + + prepare-agent + + + + + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + ${javadoc.version} + + none + protected + true + + + + + \ No newline at end of file diff --git a/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/BigtableSinkConnector.java b/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/BigtableSinkConnector.java new file mode 100644 index 000000000..183686c6e --- /dev/null +++ b/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/BigtableSinkConnector.java @@ -0,0 +1,83 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.kafka.connect.bigtable; + +import com.google.cloud.kafka.connect.bigtable.config.BigtableSinkConfig; +import com.google.cloud.kafka.connect.bigtable.config.BigtableSinkTaskConfig; +import com.google.cloud.kafka.connect.bigtable.version.PackageMetadata; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.apache.kafka.common.config.Config; +import org.apache.kafka.common.config.ConfigDef; +import org.apache.kafka.connect.connector.Task; +import org.apache.kafka.connect.sink.SinkConnector; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** {@link SinkConnector} class for Cloud Bigtable. */ +public class BigtableSinkConnector extends SinkConnector { + private Map configProperties; + private final Logger logger = LoggerFactory.getLogger(BigtableSinkConnector.class); + + @Override + public ConfigDef config() { + logger.trace("config()"); + return BigtableSinkConfig.getDefinition(); + } + + @Override + public Config validate(Map properties) { + logger.trace("validate()"); + return BigtableSinkConfig.validate(properties); + } + + @Override + public void start(Map props) { + logger.trace("start()"); + configProperties = props; + } + + @Override + public void stop() { + logger.trace("stop()"); + } + + @Override + public Class taskClass() { + logger.trace("taskClass()"); + return BigtableSinkTask.class; + } + + @Override + public List> taskConfigs(int maxTasks) { + logger.trace("taskClass({})", maxTasks); + List> configs = new ArrayList<>(maxTasks); + for (int i = 0; i < maxTasks; i++) { + Map config = new HashMap<>(configProperties); + config.put(BigtableSinkTaskConfig.CONFIG_TASK_ID, Integer.toString(i)); + configs.add(config); + } + return configs; + } + + @Override + public String version() { + logger.trace("version()"); + return PackageMetadata.getVersion(); + } +} diff --git a/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/BigtableSinkTask.java b/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/BigtableSinkTask.java new file mode 100644 index 000000000..e474518e5 --- /dev/null +++ b/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/BigtableSinkTask.java @@ -0,0 +1,471 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.kafka.connect.bigtable; + +import com.google.api.core.ApiFuture; +import com.google.api.core.ApiFutures; +import com.google.api.gax.batching.Batcher; +import com.google.api.gax.rpc.ApiException; +import com.google.cloud.bigtable.admin.v2.BigtableTableAdminClient; +import com.google.cloud.bigtable.data.v2.BigtableDataClient; +import com.google.cloud.bigtable.data.v2.models.ConditionalRowMutation; +import com.google.cloud.bigtable.data.v2.models.Filters; +import com.google.cloud.bigtable.data.v2.models.RowMutationEntry; +import com.google.cloud.kafka.connect.bigtable.autocreate.BigtableSchemaManager; +import com.google.cloud.kafka.connect.bigtable.autocreate.ResourceCreationResult; +import com.google.cloud.kafka.connect.bigtable.config.BigtableErrorMode; +import com.google.cloud.kafka.connect.bigtable.config.BigtableSinkConfig; +import com.google.cloud.kafka.connect.bigtable.config.BigtableSinkTaskConfig; +import com.google.cloud.kafka.connect.bigtable.exception.BatchException; +import com.google.cloud.kafka.connect.bigtable.exception.InvalidBigtableSchemaModificationException; +import com.google.cloud.kafka.connect.bigtable.mapping.KeyMapper; +import com.google.cloud.kafka.connect.bigtable.mapping.MutationData; +import com.google.cloud.kafka.connect.bigtable.mapping.MutationDataBuilder; +import com.google.cloud.kafka.connect.bigtable.mapping.ValueMapper; +import com.google.cloud.kafka.connect.bigtable.version.PackageMetadata; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Lists; +import com.google.protobuf.ByteString; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Future; +import java.util.stream.Collectors; +import org.apache.kafka.connect.errors.ConnectException; +import org.apache.kafka.connect.errors.DataException; +import org.apache.kafka.connect.sink.ErrantRecordReporter; +import org.apache.kafka.connect.sink.SinkRecord; +import org.apache.kafka.connect.sink.SinkTask; +import org.apache.kafka.connect.sink.SinkTaskContext; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * {@link SinkTask} class used by {@link org.apache.kafka.connect.sink.SinkConnector} to write to + * Cloud Bigtable. + */ +public class BigtableSinkTask extends SinkTask { + private BigtableSinkTaskConfig config; + private BigtableDataClient bigtableData; + private BigtableTableAdminClient bigtableAdmin; + private KeyMapper keyMapper; + private ValueMapper valueMapper; + private BigtableSchemaManager schemaManager; + @VisibleForTesting protected final Map> batchers; + @VisibleForTesting protected Logger logger = LoggerFactory.getLogger(BigtableSinkTask.class); + + /** + * A default empty constructor. Initialization methods such as {@link BigtableSinkTask#start(Map)} + * or {@link SinkTask#initialize(SinkTaskContext)} must be called before {@link + * BigtableSinkTask#put(Collection)} can be called. Kafka Connect handles it well. + */ + public BigtableSinkTask() { + this(null, null, null, null, null, null, null); + } + + // A constructor only used by the tests. + @VisibleForTesting + protected BigtableSinkTask( + BigtableSinkTaskConfig config, + BigtableDataClient bigtableData, + BigtableTableAdminClient bigtableAdmin, + KeyMapper keyMapper, + ValueMapper valueMapper, + BigtableSchemaManager schemaManager, + SinkTaskContext context) { + this.config = config; + this.bigtableData = bigtableData; + this.bigtableAdmin = bigtableAdmin; + this.keyMapper = keyMapper; + this.valueMapper = valueMapper; + this.schemaManager = schemaManager; + this.context = context; + this.batchers = new HashMap<>(); + } + + @Override + public void start(Map props) { + config = new BigtableSinkTaskConfig(props); + logger = + LoggerFactory.getLogger( + BigtableSinkTask.class.getName() + + config.getInt(BigtableSinkTaskConfig.CONFIG_TASK_ID)); + bigtableData = config.getBigtableDataClient(); + bigtableAdmin = config.getBigtableAdminClient(); + keyMapper = + new KeyMapper( + config.getString(BigtableSinkTaskConfig.CONFIG_ROW_KEY_DELIMITER), + config.getList(BigtableSinkTaskConfig.CONFIG_ROW_KEY_DEFINITION)); + valueMapper = + new ValueMapper( + config.getString(BigtableSinkTaskConfig.CONFIG_DEFAULT_COLUMN_FAMILY), + config.getString(BigtableSinkTaskConfig.CONFIG_DEFAULT_COLUMN_QUALIFIER), + config.getNullValueMode()); + schemaManager = new BigtableSchemaManager(bigtableAdmin); + } + + @Override + public void stop() { + logger.trace("stop()"); + try { + Iterable> batcherCloses = + batchers.values().stream().map(Batcher::closeAsync).collect(Collectors.toList()); + ApiFutures.allAsList(batcherCloses).get(); + } catch (ExecutionException | InterruptedException e) { + logger.warn("Error closing Cloud Bigtable batchers.", e); + } finally { + batchers.clear(); + } + if (bigtableAdmin != null) { + try { + bigtableAdmin.close(); + } catch (RuntimeException e) { + logger.warn("Error closing Cloud Bigtable admin client.", e); + } + } + if (bigtableData != null) { + try { + bigtableData.close(); + } catch (RuntimeException e) { + logger.warn("Error closing Cloud Bigtable data client.", e); + } + } + } + + @Override + public String version() { + logger.trace("version()"); + return PackageMetadata.getVersion(); + } + + @Override + public void put(Collection records) { + logger.trace("put(#records={})", records.size()); + if (records.isEmpty()) { + return; + } + + Map mutations = prepareRecords(records); + if (config.getBoolean(BigtableSinkTaskConfig.CONFIG_AUTO_CREATE_TABLES)) { + mutations = autoCreateTablesAndHandleErrors(mutations); + } + if (config.getBoolean(BigtableSinkTaskConfig.CONFIG_AUTO_CREATE_COLUMN_FAMILIES)) { + mutations = autoCreateColumnFamiliesAndHandleErrors(mutations); + } + + Map> perRecordResults = new HashMap<>(); + switch (config.getInsertMode()) { + case INSERT: + insertRows(mutations, perRecordResults); + break; + case UPSERT: + upsertRows(mutations, perRecordResults); + break; + } + handleResults(perRecordResults); + } + + /** + * Generate mutations for input records. + * + * @param records Input records. + * @return {@link Map} containing input records and corresponding mutations that need to be + * applied. + */ + @VisibleForTesting + Map prepareRecords(Collection records) { + Map mutations = new HashMap<>(); + for (SinkRecord record : records) { + try { + Optional maybeRecordMutationData = createRecordMutationData(record); + if (maybeRecordMutationData.isPresent()) { + mutations.put(record, maybeRecordMutationData.get()); + } else { + logger.debug("Skipped a record that maps to an empty value."); + } + } catch (Throwable t) { + reportError(record, t); + } + } + return mutations; + } + + /** + * Generate mutation for a single input record. + * + * @param record Input record. + * @return {@link Optional#empty()} if the input record requires no write to Cloud Bigtable, + * {@link Optional} containing mutation that it needs to be written to Cloud Bigtable + * otherwise. + */ + @VisibleForTesting + Optional createRecordMutationData(SinkRecord record) { + String recordTableId = getTableName(record); + ByteString rowKey = ByteString.copyFrom(keyMapper.getKey(record.key())); + if (rowKey.isEmpty()) { + throw new DataException( + "The record's key converts into an illegal empty Cloud Bigtable row key."); + } + long timestamp = getTimestampMicros(record); + MutationDataBuilder mutationDataBuilder = + valueMapper.getRecordMutationDataBuilder(record.value(), timestamp); + return mutationDataBuilder.maybeBuild(recordTableId, rowKey); + } + + /** + * Get table name the input record's mutation will be written to. + * + * @param record Input record. + * @return Cloud Bigtable table name the input record's mutation will be written to. + */ + @VisibleForTesting + String getTableName(SinkRecord record) { + return config + .getString(BigtableSinkTaskConfig.CONFIG_TABLE_NAME_FORMAT) + .replace("${topic}", record.topic()); + } + + /** + * Get timestamp the input record's mutation's timestamp. + * + * @param record Input record. + * @return UNIX timestamp in microseconds. + */ + @VisibleForTesting + long getTimestampMicros(SinkRecord record) { + // From reading the Java Cloud Bigtable client, it looks that the only usable timestamp + // granularity is the millisecond one. So we assume it. + // There's a test that will break when it starts supporting microsecond granularity with a note + // to modify this function then. + Long timestampMillis = record.timestamp(); + if (timestampMillis == null) { + // The timestamp might be null if the kafka cluster is old (<= v0.9): + // https://github.com/apache/kafka/blob/f9615ed275c3856b73e5b6083049a8def9f59697/clients/src/main/java/org/apache/kafka/clients/consumer/ConsumerRecord.java#L49 + // In such a case, we default to wall clock time as per the design doc. + logger.debug("Used wall clock for a record missing timestamp."); + timestampMillis = System.currentTimeMillis(); + } + return 1000 * timestampMillis; + } + + /** + * Report error as described in {@link BigtableSinkConfig#getDefinition()}. + * + * @param record Input record whose processing caused an error. + * @param throwable The error. + */ + @VisibleForTesting + void reportError(SinkRecord record, Throwable throwable) { + ErrantRecordReporter reporter; + /// We get a reference to `reporter` using a procedure described in javadoc of + /// {@link SinkTaskContext#errantRecordReporter()} that guards against old Kafka versions. + try { + reporter = context.errantRecordReporter(); + } catch (NoSuchMethodError | NoClassDefFoundError ignored) { + reporter = null; + } + if (reporter != null) { + reporter.report(record, throwable); + logger.warn( + "Used DLQ for reporting a problem with a record (throwableClass={}).", + throwable.getClass().getSimpleName()); + } else { + BigtableErrorMode errorMode = config.getBigtableErrorMode(); + switch (errorMode) { + case IGNORE: + break; + case WARN: + logger.warn("Processing of a record with key {} failed", record.key(), throwable); + break; + case FAIL: + throw new BatchException(throwable); + } + } + } + + /** + * Attempts to create Cloud Bigtable tables so that all the mutations can be applied and handles + * errors. + * + * @param mutations Input records and corresponding mutations. + * @return Subset of the input argument containing only those record for which the target Cloud + * Bigtable tables exist. + */ + @VisibleForTesting + Map autoCreateTablesAndHandleErrors( + Map mutations) { + Map okMutations = new HashMap<>(mutations); + ResourceCreationResult resourceCreationResult = schemaManager.ensureTablesExist(okMutations); + String errorMessage = "Table auto-creation failed."; + for (SinkRecord record : resourceCreationResult.getBigtableErrors()) { + reportError(record, new ConnectException(errorMessage)); + okMutations.remove(record); + } + for (SinkRecord record : resourceCreationResult.getDataErrors()) { + reportError(record, new InvalidBigtableSchemaModificationException(errorMessage)); + okMutations.remove(record); + } + return okMutations; + } + + /** + * Attempts to create Cloud Bigtable column families so that all the mutations can be applied and + * handles errors. + * + * @param mutations Input records and corresponding mutations. + * @return Subset of the input argument containing only those record for which the target Cloud + * Bigtable column families exist. + */ + @VisibleForTesting + Map autoCreateColumnFamiliesAndHandleErrors( + Map mutations) { + Map okMutations = new HashMap<>(mutations); + ResourceCreationResult resourceCreationResult = + schemaManager.ensureColumnFamiliesExist(okMutations); + String errorMessage = "Column family auto-creation failed."; + for (SinkRecord record : resourceCreationResult.getBigtableErrors()) { + reportError(record, new ConnectException(errorMessage)); + okMutations.remove(record); + } + for (SinkRecord record : resourceCreationResult.getDataErrors()) { + reportError(record, new InvalidBigtableSchemaModificationException(errorMessage)); + okMutations.remove(record); + } + return okMutations; + } + + /** + * Applies the mutations using upserts. + * + * @param mutations Mutations to be applied. + * @param perRecordResults {@link Map} the per-record results will be written to. + */ + @VisibleForTesting + void upsertRows( + Map mutations, Map> perRecordResults) { + List> mutationsToApply = + new ArrayList<>(mutations.entrySet()); + int maxBatchSize = config.getInt(BigtableSinkTaskConfig.CONFIG_MAX_BATCH_SIZE); + List>> batches = + Lists.partition(mutationsToApply, maxBatchSize); + + try { + for (List> batch : batches) { + performUpsertBatch(batch, perRecordResults); + } + } finally { + for (Batcher b : batchers.values()) { + // We flush the batchers to ensure that no unsent requests remain in the batchers + // after this method returns to make the behavior more predictable. + // We flush asynchronously and await the results instead. + b.sendOutstanding(); + } + } + } + + /** + * Applies a single mutation batch using upserts. + * + * @param batch Batch of mutations to be applied. + * @param perRecordResults A {@link Map} the per-record results will be written to. + */ + @VisibleForTesting + void performUpsertBatch( + List> batch, + Map> perRecordResults) { + logger.trace("upsertBatch(#records={})", batch.size()); + for (Map.Entry recordEntry : batch) { + SinkRecord record = recordEntry.getKey(); + MutationData recordMutationData = recordEntry.getValue(); + String recordTableName = recordMutationData.getTargetTable(); + + Batcher batcher = + batchers.computeIfAbsent(recordTableName, (k) -> bigtableData.newBulkMutationBatcher(k)); + perRecordResults.put(record, batcher.add(recordMutationData.getUpsertMutation())); + } + for (Batcher batcher : batchers.values()) { + // We must flush the batchers to respect CONFIG_MAX_BATCH_SIZE. + // We flush asynchronously and await the results instead. + batcher.sendOutstanding(); + } + } + + /** + * Applies the mutations using inserts. + * + *

Note that no batching is used. + * + * @param mutations Mutations to be applied. + * @param perRecordResults {@link Map} the per-record results will be written to. + */ + @VisibleForTesting + void insertRows( + Map mutations, Map> perRecordResults) { + logger.trace("insertRows(#records={})", mutations.size()); + for (Map.Entry recordEntry : mutations.entrySet()) { + // We keep compatibility with Confluent's sink and disallow batching operations that check if + // the row already exists. + SinkRecord record = recordEntry.getKey(); + MutationData recordMutationData = recordEntry.getValue(); + ConditionalRowMutation insert = + // We want to perform the mutation if and only if the row does not already exist. + ConditionalRowMutation.create( + recordMutationData.getTargetTable(), recordMutationData.getRowKey()) + // We first check if any cell of this row exists... + .condition(Filters.FILTERS.pass()) + // ... and perform the mutation only if no cell exists. + .otherwise(recordMutationData.getInsertMutation()); + boolean insertSuccessful; + Optional exceptionThrown = Optional.empty(); + try { + insertSuccessful = !bigtableData.checkAndMutateRow(insert); + } catch (ApiException e) { + insertSuccessful = false; + exceptionThrown = Optional.of(e); + } + perRecordResults.put( + record, + insertSuccessful + ? CompletableFuture.completedFuture(null) + : CompletableFuture.failedFuture( + exceptionThrown.orElse( + new ConnectException("Insert failed since the row already existed.")))); + } + } + + /** + * Handles results of the whole operation. + * + * @param perRecordResults Results to be handled. + */ + @VisibleForTesting + void handleResults(Map> perRecordResults) { + logger.trace("handleResults(#records={})", perRecordResults.size()); + for (Map.Entry> recordResult : perRecordResults.entrySet()) { + try { + recordResult.getValue().get(); + } catch (ExecutionException | InterruptedException e) { + SinkRecord record = recordResult.getKey(); + reportError(record, e); + } + } + } +} diff --git a/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/autocreate/BigtableSchemaManager.java b/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/autocreate/BigtableSchemaManager.java new file mode 100644 index 000000000..7db1f3110 --- /dev/null +++ b/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/autocreate/BigtableSchemaManager.java @@ -0,0 +1,470 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.kafka.connect.bigtable.autocreate; + +import com.google.api.core.ApiFuture; +import com.google.api.gax.rpc.ApiException; +import com.google.api.gax.rpc.StatusCode; +import com.google.cloud.bigtable.admin.v2.BigtableTableAdminClient; +import com.google.cloud.bigtable.admin.v2.models.ColumnFamily; +import com.google.cloud.bigtable.admin.v2.models.CreateTableRequest; +import com.google.cloud.bigtable.admin.v2.models.ModifyColumnFamiliesRequest; +import com.google.cloud.bigtable.admin.v2.models.Table; +import com.google.cloud.kafka.connect.bigtable.exception.BatchException; +import com.google.cloud.kafka.connect.bigtable.mapping.MutationData; +import com.google.common.annotations.VisibleForTesting; +import java.util.AbstractMap; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.ExecutionException; +import java.util.stream.Collectors; +import org.apache.kafka.connect.sink.SinkRecord; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A class responsible for the creation of Cloud Bigtable {@link Table Table(s)} and {@link + * ColumnFamily ColumnFamily(s)} needed by the transformed Kafka Connect records. + * + *

This class contains nontrivial logic since we try to avoid API calls if possible. + * + *

This class does not automatically rediscover deleted resources. If another user of the Cloud + * Bigtable instance deletes a table or a column, the sink using an instance of this class to + * auto-create the resources, might end up sending requests targeting nonexistent {@link Table + * Table(s)} and/or {@link ColumnFamily ColumnFamily(s)}. + */ +public class BigtableSchemaManager { + @VisibleForTesting protected Logger logger = LoggerFactory.getLogger(BigtableSchemaManager.class); + + private final BigtableTableAdminClient bigtable; + + /** + * A {@link Map} storing the names of existing Cloud Bigtable tables as keys and existing column + * families within these tables as the values. + * + *

We have a single data structure for table and column family caches to ensure that they are + * consistent.
+ * An {@link Optional#empty()} value means that a table exists, but we don't know what column + * families it contains. + */ + @VisibleForTesting protected Map>> tableNameToColumnFamilies; + + /** + * The default constructor. + * + * @param bigtable The Cloud Bigtable admin client used to auto-create {@link Table Table(s)} and + * {@link ColumnFamily ColumnFamily(s)}. + */ + public BigtableSchemaManager(BigtableTableAdminClient bigtable) { + this.bigtable = bigtable; + tableNameToColumnFamilies = new HashMap<>(); + } + + /** + * Ensures that all the {@link Table Table(s)} needed by the input records exist by attempting to + * create the missing ones. + * + * @param recordsAndOutputs A {@link Map} containing {@link SinkRecord SinkRecord(s)} and their + * matching {@link MutationData} specifying which {@link Table Table(s)} need to exist. + * @return A {@link ResourceCreationResult} containing {@link SinkRecord SinkRecord(s)} for whose + * {@link MutationData} auto-creation of {@link Table Table(s)} failed. + */ + public ResourceCreationResult ensureTablesExist(Map recordsAndOutputs) { + Map> recordsByTableNames = getTableNamesToRecords(recordsAndOutputs); + + Map> recordsByMissingTableNames = + missingTablesToRecords(recordsByTableNames); + if (recordsByMissingTableNames.isEmpty()) { + return ResourceCreationResult.empty(); + } + logger.debug("Missing {} tables", recordsByMissingTableNames.size()); + Map, ResourceAndRecords> recordsByCreateTableFutures = + sendCreateTableRequests(recordsByMissingTableNames); + // No cache update here since we create tables with no column families, so every (non-delete) + // write to the table will need to create needed column families first, so saving the data from + // the response gives us no benefit. + // We ignore errors to handle races between multiple tasks of a single connector and refresh + // the cache in a further step. + Set dataErrors = + awaitResourceCreationAndHandleInvalidInputErrors( + recordsByCreateTableFutures, "Error creating a Cloud Bigtable table: %s"); + refreshTableNamesCache(); + Set bigtableErrors = + missingTablesToRecords(recordsByMissingTableNames).values().stream() + .flatMap(Collection::stream) + .collect(Collectors.toSet()); + bigtableErrors.removeAll(dataErrors); + return new ResourceCreationResult(bigtableErrors, dataErrors); + } + + /** + * Ensures that all the {@link ColumnFamily ColumnFamily(s)} needed by the input records exist by + * attempting to create the missing ones. + * + *

This method will not try to create missing {@link Table Table(s)} tables if some of the + * needed ones do not exist, but it will handle that case gracefully. + * + * @param recordsAndOutputs A {@link Map} containing {@link SinkRecord SinkRecord(s)} and their + * matching {@link MutationData} specifying which {@link ColumnFamily ColumnFamily(s)} need to + * exist. + * @return A {@link ResourceCreationResult} containing {@link SinkRecord SinkRecord(s)} for whose + * {@link MutationData} needed {@link Table Table(s)} are missing or auto-creation of {@link + * ColumnFamily ColumnFamily(s)} failed. + */ + public ResourceCreationResult ensureColumnFamiliesExist( + Map recordsAndOutputs) { + Map, List> recordsByColumnFamilies = + getTableColumnFamiliesToRecords(recordsAndOutputs); + + Map, List> recordsByMissingColumnFamilies = + missingTableColumnFamiliesToRecords(recordsByColumnFamilies); + if (recordsByMissingColumnFamilies.isEmpty()) { + return ResourceCreationResult.empty(); + } + logger.debug("Missing {} column families", recordsByMissingColumnFamilies.size()); + Map, ResourceAndRecords>> + recordsByCreateColumnFamilyFutures = + sendCreateColumnFamilyRequests(recordsByMissingColumnFamilies); + + // No cache update here since the requests are handled by Cloud Bigtable in a random order. + // We ignore errors to handle races between multiple tasks of a single connector + // and refresh the cache in a further step. + Set dataErrors = + awaitResourceCreationAndHandleInvalidInputErrors( + recordsByCreateColumnFamilyFutures, "Error creating a Cloud Bigtable column family %s"); + + Set tablesRequiringRefresh = + recordsByMissingColumnFamilies.keySet().stream() + .map(Map.Entry::getKey) + .collect(Collectors.toSet()); + refreshTableColumnFamiliesCache(tablesRequiringRefresh); + + Map, List> missing = + missingTableColumnFamiliesToRecords(recordsByMissingColumnFamilies); + Set bigtableErrors = + missing.values().stream().flatMap(Collection::stream).collect(Collectors.toSet()); + bigtableErrors.removeAll(dataErrors); + return new ResourceCreationResult(bigtableErrors, dataErrors); + } + + /** + * @param recordsAndOutputs A {@link Map} containing {@link SinkRecord SinkRecords} and + * corresponding Cloud Bigtable mutations. + * @return A {@link Map} containing Cloud Bigtable table names and {@link SinkRecord SinkRecords} + * that need these tables to exist. + */ + private static Map> getTableNamesToRecords( + Map recordsAndOutputs) { + Map> tableNamesToRecords = new HashMap<>(); + for (Map.Entry rowEntry : recordsAndOutputs.entrySet()) { + SinkRecord record = rowEntry.getKey(); + String tableName = rowEntry.getValue().getTargetTable(); + List records = + tableNamesToRecords.computeIfAbsent(tableName, k -> new ArrayList<>()); + records.add(record); + } + return tableNamesToRecords; + } + + /** + * @param recordsAndOutputs A {@link Map} containing {@link SinkRecord SinkRecords} and + * corresponding Cloud Bigtable mutations. + * @return A {@link Map} containing {@link Map.Entry Map.Entry(s)} consisting of Bigtable table + * names and column families and {@link SinkRecord SinkRecords} that need to use these tables + * and column families to exist. + */ + private static Map, List> getTableColumnFamiliesToRecords( + Map recordsAndOutputs) { + Map, List> tableColumnFamiliesToRecords = new HashMap<>(); + for (Map.Entry e : recordsAndOutputs.entrySet()) { + SinkRecord record = e.getKey(); + MutationData recordMutationData = e.getValue(); + String tableName = recordMutationData.getTargetTable(); + for (String columnFamily : recordMutationData.getRequiredColumnFamilies()) { + Map.Entry key = + new AbstractMap.SimpleImmutableEntry<>(tableName, columnFamily); + List records = + tableColumnFamiliesToRecords.computeIfAbsent(key, k -> new ArrayList<>()); + records.add(record); + } + } + return tableColumnFamiliesToRecords; + } + + /** + * Refreshes the existing table names in the cache. + * + *

Note that it deletes the entries from the cache if the tables disappear. + */ + @VisibleForTesting + void refreshTableNamesCache() { + Set tables; + try { + tables = new HashSet<>(bigtable.listTables()); + } catch (ApiException e) { + logger.error("listTables() exception", e); + // We don't allow listTables() to fail. It means something is seriously wrong, so we fail the + // whole batch. + throw new BatchException(e); + } + for (String key : new HashSet<>(tableNameToColumnFamilies.keySet())) { + if (!tables.contains(key)) { + tableNameToColumnFamilies.remove(key); + } + } + for (String table : tables) { + tableNameToColumnFamilies.putIfAbsent(table, Optional.empty()); + } + } + + /** + * Refreshes existing table names and a subset of existing column families in the cache. + * + *

Note that it deletes the entries from the cache if the tables disappeared and that it + * doesn't modify column family caches of tables that aren't provided as an argument. + * + * @param tablesRequiringRefresh A {@link Set} of table names whose column family caches will be + * refreshed. + */ + @VisibleForTesting + void refreshTableColumnFamiliesCache(Set tablesRequiringRefresh) { + refreshTableNamesCache(); + List>> tableFutures = + tableNameToColumnFamilies.keySet().stream() + .filter(tablesRequiringRefresh::contains) + .map(t -> new AbstractMap.SimpleImmutableEntry<>(t, bigtable.getTableAsync(t))) + .collect(Collectors.toList()); + Map>> newCache = new HashMap<>(tableNameToColumnFamilies); + for (Map.Entry> entry : tableFutures) { + String tableName = entry.getKey(); + try { + Table tableDetails = entry.getValue().get(); + Set tableColumnFamilies = + tableDetails.getColumnFamilies().stream() + .map(ColumnFamily::getId) + .collect(Collectors.toSet()); + newCache.put(tableName, Optional.of(tableColumnFamilies)); + } catch (ExecutionException | InterruptedException e) { + // We don't allow getTable() to fail. If it does, the entry is removed from the cache. This + // way its SinkRecord will be failed by ensureColumnFamiliesExist(). The alternative is to + // throw an exception and fail the whole batch that way. + logger.warn("getTable({}) exception", tableName, e); + newCache.remove(tableName); + } + } + // Note that we update the cache atomically to avoid partial errors. If an unexpected exception + // is thrown, the whole batch is failed. It's not ideal, but in line with the behavior of other + // connectors. + tableNameToColumnFamilies = newCache; + } + + /** + * @param tableNamesToRecords A {@link Map} containing Cloud Bigtable table names and {@link + * SinkRecord SinkRecords} that need these tables to exist. + * @return A subset of the input argument with the entries corresponding to existing tables + * removed. + */ + private Map> missingTablesToRecords( + Map> tableNamesToRecords) { + Map> recordsByMissingTableNames = new HashMap<>(tableNamesToRecords); + recordsByMissingTableNames.keySet().removeAll(tableNameToColumnFamilies.keySet()); + return recordsByMissingTableNames; + } + + /** + * @param tableColumnFamiliesToRecords A {@link Map} containing {@link Map.Entry} consisting of + * Bigtable table names and column families and {@link SinkRecord SinkRecords} that need to + * use these tables and column families to exist. + * @return A subset of the input argument with the entries corresponding to existing column + * families removed. + */ + private Map, List> missingTableColumnFamiliesToRecords( + Map, List> tableColumnFamiliesToRecords) { + Map, List> recordsByMissingColumnFamilies = + new HashMap<>(tableColumnFamiliesToRecords); + for (Map.Entry>> existingEntry : + tableNameToColumnFamilies.entrySet()) { + String tableName = existingEntry.getKey(); + for (String columnFamily : existingEntry.getValue().orElse(new HashSet<>())) { + recordsByMissingColumnFamilies.remove( + new AbstractMap.SimpleImmutableEntry<>(tableName, columnFamily)); + } + } + return recordsByMissingColumnFamilies; + } + + private Map, ResourceAndRecords> sendCreateTableRequests( + Map> recordsByMissingTables) { + Map, ResourceAndRecords> result = new HashMap<>(); + for (Map.Entry> e : recordsByMissingTables.entrySet()) { + ResourceAndRecords resourceAndRecords = + new ResourceAndRecords<>(e.getKey(), e.getValue()); + result.put(createTable(e.getKey()), resourceAndRecords); + } + return result; + } + + private Map, ResourceAndRecords>> + sendCreateColumnFamilyRequests( + Map, List> recordsByMissingColumnFamilies) { + Map, ResourceAndRecords>> result = new HashMap<>(); + for (Map.Entry, List> e : + recordsByMissingColumnFamilies.entrySet()) { + ResourceAndRecords> resourceAndRecords = + new ResourceAndRecords<>(e.getKey(), e.getValue()); + result.put(createColumnFamily(e.getKey()), resourceAndRecords); + } + return result; + } + + private ApiFuture createTable(String tableName) { + logger.info("Creating table '{}'", tableName); + CreateTableRequest createTableRequest = CreateTableRequest.of(tableName); + return bigtable.createTableAsync(createTableRequest); + } + + // We only issue one request at a time because each multi-column-family operation on a single + // Table is atomic and fails if any of the Column Families to be created already exists. + // Thus by sending multiple requests, we simplify error handling when races between multiple + // tasks of a single connector happen. + private ApiFuture
createColumnFamily(Map.Entry tableNameAndColumnFamily) { + String tableName = tableNameAndColumnFamily.getKey(); + String columnFamily = tableNameAndColumnFamily.getValue(); + logger.info("Creating column family '{}' in table '{}'", columnFamily, tableName); + ModifyColumnFamiliesRequest request = + ModifyColumnFamiliesRequest.of(tableName).addFamily(columnFamily); + return bigtable.modifyFamiliesAsync(request); + } + + /** + * Awaits resource auto-creation result futures and handles the errors. + * + *

The errors might be handled in two ways: + * + *

    + *
  • If a resource's creation failed with an exception signifying that the request was + * invalid, it is assumed that input {@link SinkRecord SinkRecord(s)} map to invalid values, + * so all the {@link SinkRecord SinkRecord(s)} needing the resource whose creation failed + * are returned. + *
  • Other resource creation errors are logged. + *
+ * + * @param createdColumnFamilyFuturesAndRecords {@link Map} of {@link ApiFuture ApiFuture(s)} and + * information what resource is created and for which {@link SinkRecord SinkRecord(s)}. + * @param errorMessageTemplate The Java format string template of error message with which Cloud + * Bigtable exceptions for valid input data are logged. + * @return A {@link Set} of {@link SinkRecord SinkRecord(s)} for which auto resource creation + * failed due to their invalid data. + * @param {@link ApiFuture} containing result of the resource creation operation. + * @param The resources' type identifier. + */ + @VisibleForTesting + , Id> Set awaitResourceCreationAndHandleInvalidInputErrors( + Map> createdColumnFamilyFuturesAndRecords, + String errorMessageTemplate) { + Set dataErrors = new HashSet<>(); + createdColumnFamilyFuturesAndRecords.forEach( + (fut, resourceAndRecords) -> { + Object resource = resourceAndRecords.getResource(); + List sinkRecords = resourceAndRecords.getRecords(); + try { + fut.get(); + } catch (ExecutionException | InterruptedException e) { + String errorMessage = String.format(errorMessageTemplate, resource.toString()); + if (SchemaApiExceptions.isCausedByInputError(e)) { + dataErrors.addAll(sinkRecords); + } else { + logger.info(errorMessage, e); + } + } + }); + return dataErrors; + } + + /** + * A record class connecting an auto-created resource and {@link SinkRecord SinkRecord(s)} + * requiring it to exist. + * + * @param The resources' type identifier. + */ + @VisibleForTesting + static class ResourceAndRecords { + private final Id resource; + private final List records; + + public ResourceAndRecords(Id resource, List records) { + this.resource = resource; + this.records = records; + } + + public Id getResource() { + return resource; + } + + public List getRecords() { + return records; + } + } + + /** + * A helper class containing logic for grouping {@link ApiException ApiException(s)} encountered + * when modifying Cloud Bigtable schema. + */ + @VisibleForTesting + static class SchemaApiExceptions { + /** + * @param t Exception thrown by some function using Cloud Bigtable API. + * @return true if input exception was caused by invalid Cloud Bigtable request, false + * otherwise. + */ + @VisibleForTesting + static boolean isCausedByInputError(Throwable t) { + return maybeExtractBigtableStatusCode(t) + .map(sc -> isStatusCodeCausedByInputError(sc.getCode())) + .orElse(false); + } + + @VisibleForTesting + static Optional maybeExtractBigtableStatusCode(Throwable t) { + while (t != null) { + if (t instanceof ApiException) { + ApiException apiException = (ApiException) t; + return Optional.of(apiException.getStatusCode()); + } + t = t.getCause(); + } + return Optional.empty(); + } + + @VisibleForTesting + static boolean isStatusCodeCausedByInputError(StatusCode.Code code) { + switch (code) { + case INVALID_ARGUMENT: + case FAILED_PRECONDITION: + case OUT_OF_RANGE: + return true; + default: + return false; + } + } + } +} diff --git a/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/autocreate/ResourceCreationResult.java b/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/autocreate/ResourceCreationResult.java new file mode 100644 index 000000000..f4de7efca --- /dev/null +++ b/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/autocreate/ResourceCreationResult.java @@ -0,0 +1,51 @@ +/* + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.kafka.connect.bigtable.autocreate; + +import java.util.HashSet; +import java.util.Set; +import org.apache.kafka.connect.sink.SinkRecord; + +/** A record class storing the output of {@link BigtableSchemaManager} operations. */ +public class ResourceCreationResult { + private final Set bigtableErrors; + private final Set dataErrors; + + public static ResourceCreationResult empty() { + return new ResourceCreationResult(new HashSet<>(), new HashSet<>()); + } + + public ResourceCreationResult(Set bigtableErrors, Set dataErrors) { + this.bigtableErrors = bigtableErrors; + this.dataErrors = dataErrors; + } + + /** + * @return A {@link Set} of {@link SinkRecord SinkRecord(s)} for which resource auto-creation + * failed due to some problems on Cloud Bigtable part. + */ + public Set getBigtableErrors() { + return bigtableErrors; + } + + /** + * @return A {@link Set} of {@link SinkRecord SinkRecord(s)} for which resource auto-creation + * failed due to invalid input data. These records should not ever be retried. + */ + public Set getDataErrors() { + return dataErrors; + } +} diff --git a/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/config/BigtableErrorMode.java b/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/config/BigtableErrorMode.java new file mode 100644 index 000000000..f8447c854 --- /dev/null +++ b/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/config/BigtableErrorMode.java @@ -0,0 +1,22 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.kafka.connect.bigtable.config; + +public enum BigtableErrorMode { + FAIL, + WARN, + IGNORE, +} diff --git a/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/config/BigtableSinkConfig.java b/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/config/BigtableSinkConfig.java new file mode 100644 index 000000000..0fe27aff4 --- /dev/null +++ b/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/config/BigtableSinkConfig.java @@ -0,0 +1,548 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.kafka.connect.bigtable.config; + +import com.google.api.gax.core.CredentialsProvider; +import com.google.api.gax.core.FixedCredentialsProvider; +import com.google.api.gax.retrying.RetrySettings; +import com.google.auth.oauth2.GoogleCredentials; +import com.google.cloud.bigtable.admin.v2.BigtableTableAdminClient; +import com.google.cloud.bigtable.admin.v2.BigtableTableAdminSettings; +import com.google.cloud.bigtable.admin.v2.stub.BigtableTableAdminStubSettings; +import com.google.cloud.bigtable.data.v2.BigtableDataClient; +import com.google.cloud.bigtable.data.v2.BigtableDataSettings; +import com.google.cloud.bigtable.data.v2.stub.EnhancedBigtableStubSettings; +import com.google.common.annotations.VisibleForTesting; +import java.io.ByteArrayInputStream; +import java.io.FileInputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.function.Function; +import org.apache.kafka.common.config.AbstractConfig; +import org.apache.kafka.common.config.Config; +import org.apache.kafka.common.config.ConfigDef; +import org.apache.kafka.common.config.ConfigException; +import org.apache.kafka.common.config.ConfigValue; +import org.apache.kafka.common.utils.Utils; +import org.apache.kafka.connect.errors.RetriableException; +import org.threeten.bp.Duration; +import org.threeten.bp.temporal.ChronoUnit; + +/** + * A class defining the configuration of {@link + * com.google.cloud.kafka.connect.bigtable.BigtableSinkConnector}. + * + *

It's responsible for the validation and parsing of the user-provided values. + */ +public class BigtableSinkConfig extends AbstractConfig { + public static final String CONFIG_GCP_PROJECT_ID = "gcp.bigtable.project.id"; + public static final String CONFIG_GCP_CREDENTIALS_PATH = "gcp.bigtable.credentials.path"; + public static final String CONFIG_GCP_CREDENTIALS_JSON = "gcp.bigtable.credentials.json"; + public static final String CONFIG_BIGTABLE_INSTANCE_ID = "gcp.bigtable.instance.id"; + public static final String CONFIG_BIGTABLE_APP_PROFILE_ID = "gcp.bigtable.app.profile.id"; + public static final String CONFIG_INSERT_MODE = "insert.mode"; + public static final String CONFIG_MAX_BATCH_SIZE = "max.batch.size"; + public static final String CONFIG_VALUE_NULL_MODE = "value.null.mode"; + public static final String CONFIG_ERROR_MODE = "error.mode"; + public static final String CONFIG_TABLE_NAME_FORMAT = "table.name.format"; + public static final String CONFIG_ROW_KEY_DEFINITION = "row.key.definition"; + public static final String CONFIG_ROW_KEY_DELIMITER = "row.key.delimiter"; + public static final String CONFIG_AUTO_CREATE_TABLES = "auto.create.tables"; + public static final String CONFIG_AUTO_CREATE_COLUMN_FAMILIES = "auto.create.column.families"; + public static final String CONFIG_DEFAULT_COLUMN_FAMILY = "default.column.family"; + public static final String CONFIG_DEFAULT_COLUMN_QUALIFIER = "default.column.qualifier"; + public static final String CONFIG_RETRY_TIMEOUT_MILLIS = "retry.timeout.ms"; + private static final InsertMode DEFAULT_INSERT_MODE = InsertMode.INSERT; + private static final NullValueMode DEFAULT_NULL_VALUE_MODE = NullValueMode.WRITE; + private static final BigtableErrorMode DEFAULT_ERROR_MODE = BigtableErrorMode.FAIL; + private static final Integer DEFAULT_MAX_BATCH_SIZE = 1; + private static final List BIGTABLE_CONFIGURATION_PROPERTIES = + List.of( + CONFIG_GCP_CREDENTIALS_JSON, + CONFIG_GCP_CREDENTIALS_PATH, + CONFIG_GCP_PROJECT_ID, + CONFIG_BIGTABLE_INSTANCE_ID, + CONFIG_BIGTABLE_APP_PROFILE_ID); + private static final int BIGTABLE_CREDENTIALS_CHECK_TIMEOUT_SECONDS = 2; + + protected BigtableSinkConfig(ConfigDef definition, Map properties) { + super(definition, properties); + } + + /** + * The main constructor. + * + * @param properties The properties provided by the user. + */ + public BigtableSinkConfig(Map properties) { + this(getDefinition(), properties); + } + + /** + * Validates that a valid {@link BigtableSinkConfig} can be created using the input properties. + * + * @param props The properties provided by the user. + * @return {@link Config} containing validation results. + */ + public static Config validate(Map props) { + return validate(props, true); + } + + /** + * Validates that a valid {@link BigtableSinkConfig} can be created using the input properties. + * + * @param props The properties provided by the user. + * @param accessBigtableToValidateConfiguration If set to true, validation includes checking + * whether the Cloud Bigtable configuration is valid by connecting to Cloud Bigtable and + * attempting to execute a simple read-only operation. + * @return {@link Config} containing validation results. + */ + @VisibleForTesting + static Config validate(Map props, boolean accessBigtableToValidateConfiguration) { + // Note that we only need to verify the properties we define, the generic Sink configuration is + // handled in SinkConnectorConfig::validate(). + String credentialsPath = props.get(CONFIG_GCP_CREDENTIALS_PATH); + String credentialsJson = props.get(CONFIG_GCP_CREDENTIALS_JSON); + String insertMode = props.get(CONFIG_INSERT_MODE); + String maxBatchSize = props.get(CONFIG_MAX_BATCH_SIZE); + String effectiveInsertMode = + Optional.ofNullable(insertMode).orElse(DEFAULT_INSERT_MODE.name()).toUpperCase(); + String effectiveMaxBatchSize = + Optional.ofNullable(maxBatchSize).orElse(DEFAULT_MAX_BATCH_SIZE.toString()).trim(); + + Map validationResult = getDefinition().validateAll(props); + if (!Utils.isBlank(credentialsPath) && !Utils.isBlank(credentialsJson)) { + String errorMessage = + CONFIG_GCP_CREDENTIALS_JSON + + " and " + + CONFIG_GCP_CREDENTIALS_PATH + + " are mutually exclusive options, but both are set."; + addErrorMessage(validationResult, CONFIG_GCP_CREDENTIALS_JSON, credentialsJson, errorMessage); + addErrorMessage(validationResult, CONFIG_GCP_CREDENTIALS_PATH, credentialsPath, errorMessage); + } + if (effectiveInsertMode.equals(InsertMode.INSERT.name()) + && !effectiveMaxBatchSize.equals("1")) { + String errorMessage = + "When using `" + + CONFIG_INSERT_MODE + + "` of `insert`, " + + CONFIG_MAX_BATCH_SIZE + + " must be set to `1`."; + addErrorMessage(validationResult, CONFIG_INSERT_MODE, insertMode, errorMessage); + addErrorMessage(validationResult, CONFIG_MAX_BATCH_SIZE, maxBatchSize, errorMessage); + } + + if (accessBigtableToValidateConfiguration + && validationResult.values().stream().allMatch(v -> v.errorMessages().isEmpty())) { + // We validate the user's credentials in order to warn them early rather than fill DLQ + // with records whose processing would fail due to invalid credentials. + // We only call it after validating that all other parameters are fine since creating + // a Cloud Bigtable client uses many of these parameters, and we don't want to warn + // the user unnecessarily. + BigtableSinkConfig config = new BigtableSinkConfig(props); + if (!config.isBigtableConfigurationValid()) { + String errorMessage = "Cloud Bigtable configuration is invalid."; + for (String bigtableProp : BIGTABLE_CONFIGURATION_PROPERTIES) { + addErrorMessage(validationResult, bigtableProp, props.get(bigtableProp), errorMessage); + } + } + } + return new Config(new ArrayList<>(validationResult.values())); + } + + /** + * @return {@link ConfigDef} used by Kafka Connect to advertise configuration options to the user + * and by us to perform basic validation of the user-provided values. + */ + public static ConfigDef getDefinition() { + return new ConfigDef() + .define( + CONFIG_GCP_PROJECT_ID, + ConfigDef.Type.STRING, + ConfigDef.NO_DEFAULT_VALUE, + ConfigDef.CompositeValidator.of( + new ConfigDef.NonNullValidator(), new ConfigDef.NonEmptyString()), + ConfigDef.Importance.HIGH, + "The ID of the GCP project.") + .define( + CONFIG_BIGTABLE_INSTANCE_ID, + ConfigDef.Type.STRING, + ConfigDef.NO_DEFAULT_VALUE, + ConfigDef.CompositeValidator.of( + new ConfigDef.NonNullValidator(), new ConfigDef.NonEmptyString()), + ConfigDef.Importance.HIGH, + "The ID of the Cloud Bigtable instance.") + .define( + CONFIG_BIGTABLE_APP_PROFILE_ID, + ConfigDef.Type.STRING, + null, + ConfigDef.Importance.MEDIUM, + "The application profile that the connector should use. If none is supplied," + + " the default app profile will be used.") + .define( + CONFIG_GCP_CREDENTIALS_PATH, + ConfigDef.Type.STRING, + null, + ConfigDef.Importance.HIGH, + "The path to the JSON service key file. Configure at most one of `" + + CONFIG_GCP_CREDENTIALS_PATH + + "` and `" + + CONFIG_GCP_CREDENTIALS_JSON + + "`. If neither is provided, Application Default Credentials will be used.") + .define( + CONFIG_GCP_CREDENTIALS_JSON, + ConfigDef.Type.STRING, + null, + ConfigDef.Importance.HIGH, + "The path to the JSON service key file. Configure at most one of `" + + CONFIG_GCP_CREDENTIALS_PATH + + "` and `" + + CONFIG_GCP_CREDENTIALS_JSON + + "`. If neither is provided, Application Default Credentials will be used.") + .define( + CONFIG_INSERT_MODE, + ConfigDef.Type.STRING, + DEFAULT_INSERT_MODE.name(), + enumValidator(InsertMode.values()), + ConfigDef.Importance.HIGH, + "Defines the insertion mode to use. Supported modes are:" + + "\n- insert - Insert new record only." + + " If the row to be written already exists in the table, an error is thrown." + + "\n- upsert - If the row to be written already exists," + + " then its column values are overwritten with the ones provided.") + .define( + CONFIG_MAX_BATCH_SIZE, + ConfigDef.Type.INT, + DEFAULT_MAX_BATCH_SIZE, + ConfigDef.Range.atLeast(1), + ConfigDef.Importance.MEDIUM, + "The maximum number of records that can be batched into a batch of upserts." + + " Note that since only a batch size of 1 for inserts is supported, `" + + CONFIG_MAX_BATCH_SIZE + + "` must be exactly `1` when `" + + CONFIG_INSERT_MODE + + "` is set to `INSERT`.") + .define( + CONFIG_VALUE_NULL_MODE, + ConfigDef.Type.STRING, + DEFAULT_NULL_VALUE_MODE.name(), + enumValidator(NullValueMode.values()), + ConfigDef.Importance.MEDIUM, + "Defines what to do with `null` Kafka values. Supported modes are:" + + "\n- write - Serialize `null`s to empty byte arrays." + + "\n- ignore - Ignore `null`s." + + "\n- delete - Use them to issue DELETE commands. Root-level `null` deletes a" + + " row. `null` nested one level deletes a column family named after the" + + " `null`-valued field. `null` nested two levels deletes a column named after the" + + " `null`-valued field in column family named after the `null-valued` field parent" + + " field. `null` values nested more than two levels are serialized like other" + + " values and don't result in any DELETE commands.") + .define( + CONFIG_ERROR_MODE, + ConfigDef.Type.STRING, + DEFAULT_ERROR_MODE.name(), + enumValidator(BigtableErrorMode.values()), + ConfigDef.Importance.MEDIUM, + "Specifies how to handle errors that result from writes, after retries. It is ignored" + + " if DLQ is configured. Supported modes are:" + + "\n- fail - The connector fails and must be manually restarted." + + "\n- warn - The connector logs a warning and continues operating normally." + + "\n- ignore - The connector does not log a warning but continues operating" + + " normally.") + .define( + CONFIG_TABLE_NAME_FORMAT, + ConfigDef.Type.STRING, + "${topic}", + ConfigDef.CompositeValidator.of( + new ConfigDef.NonNullValidator(), new ConfigDef.NonEmptyString()), + ConfigDef.Importance.MEDIUM, + "Name of the destination table. Use `${topic}` within the table name to specify" + + " the originating topic name.\nFor example, `user_${topic}` for the topic `stats`" + + " will map to the table name `user_stats`.") + .define( + CONFIG_ROW_KEY_DEFINITION, + ConfigDef.Type.LIST, + "", + ConfigDef.Importance.MEDIUM, + "A comma separated list of Kafka Record key field names that specifies the order of" + + " Kafka key fields to be concatenated to form the row key." + + "\nFor example the list: `username, post_id, time_stamp` when applied to a Kafka" + + " key: `{'username': 'bob','post_id': '213', 'time_stamp': '123123'}` and with" + + " delimiter `#` gives the row key `bob#213#123123`. You can also access terms" + + " nested in the key by using `.` as a delimiter. If this configuration is empty" + + " or unspecified and the Kafka Message Key is a" + + "\n- struct, all the fields in the struct are used to construct the row key." + + "\n- byte array, the row key is set to the byte array as is." + + "\n- primitive, the row key is set to the primitive stringified." + + "If prefixes, more complicated delimiters, and string constants are required in" + + " your Row Key, consider configuring an SMT to add relevant fields to the Kafka" + + " Record key.") + .define( + CONFIG_ROW_KEY_DELIMITER, + ConfigDef.Type.STRING, + "", + ConfigDef.Importance.LOW, + "The delimiter used in concatenating Kafka key fields in the row key. If this" + + " configuration is empty or unspecified, the key fields will be concatenated" + + " together directly.") + .define( + CONFIG_AUTO_CREATE_TABLES, + ConfigDef.Type.BOOLEAN, + false, + new ConfigDef.NonNullValidator(), + ConfigDef.Importance.MEDIUM, + "Whether to automatically create the destination table if it is found to be missing." + + "\nWhen enabled, the records for which the auto-creation fails, are failed." + + "\nRecreation of tables deleted by other Cloud Bigtable users is not supported.") + .define( + CONFIG_AUTO_CREATE_COLUMN_FAMILIES, + ConfigDef.Type.BOOLEAN, + false, + new ConfigDef.NonNullValidator(), + ConfigDef.Importance.MEDIUM, + "Whether to automatically create missing columns families in the table relative to the" + + " record schema." + + "\nDoes not imply auto-creation of tables." + + "\nWhen enabled, the records for which the auto-creation fails, are failed." + + "\nRecreation of column families deleted by other Cloud Bigtable users is not" + + " supported.") + .define( + CONFIG_DEFAULT_COLUMN_FAMILY, + ConfigDef.Type.STRING, + "default", + ConfigDef.Importance.MEDIUM, + "Any root-level fields on the SinkRecord that aren't objects will be added to this" + + " column family. If empty, the fields will be ignored.") + .define( + CONFIG_DEFAULT_COLUMN_QUALIFIER, + ConfigDef.Type.STRING, + "KAFKA_VALUE", + ConfigDef.Importance.MEDIUM, + "Any root-level values on the SinkRecord that aren't objects will be added to this" + + " column within default column family. If empty, the value will be ignored.") + .define( + CONFIG_RETRY_TIMEOUT_MILLIS, + ConfigDef.Type.LONG, + 90000, + ConfigDef.Range.atLeast(0), + ConfigDef.Importance.MEDIUM, + "Maximum time in milliseconds allocated for retrying database operations before trying" + + " other error handling mechanisms."); + } + + /** + * Adds a validation error in the format expected by {@link BigtableSinkConfig#validate(Map)}. + * + * @param validatedConfig Input/output parameter containing current validation result. + * @param name Configuration parameter name. + * @param value Configuration parameter value. + * @param errorMessage Error message to be added. + */ + private static void addErrorMessage( + Map validatedConfig, String name, String value, String errorMessage) { + validatedConfig + .computeIfAbsent( + name, p -> new ConfigValue(name, value, Collections.emptyList(), new ArrayList<>())) + .addErrorMessage(errorMessage); + } + + public NullValueMode getNullValueMode() { + return getEnum(CONFIG_VALUE_NULL_MODE, NullValueMode::valueOf); + } + + public BigtableErrorMode getBigtableErrorMode() { + return getEnum(CONFIG_ERROR_MODE, BigtableErrorMode::valueOf); + } + + public InsertMode getInsertMode() { + return getEnum(CONFIG_INSERT_MODE, InsertMode::valueOf); + } + + /** + * @return {@link BigtableTableAdminClient} connected to a Cloud Bigtable instance configured as + * described in {@link BigtableSinkConfig#getDefinition()}. + */ + public BigtableTableAdminClient getBigtableAdminClient() { + RetrySettings retrySettings = getRetrySettings(); + return getBigtableAdminClient(retrySettings); + } + + @VisibleForTesting + BigtableTableAdminClient getBigtableAdminClient(RetrySettings retrySettings) { + Optional credentialsProvider = + getUserConfiguredBigtableCredentialsProvider(); + + BigtableTableAdminSettings.Builder adminSettingsBuilder = + BigtableTableAdminSettings.newBuilder() + .setProjectId(getString(BigtableSinkTaskConfig.CONFIG_GCP_PROJECT_ID)) + .setInstanceId(getString(BigtableSinkTaskConfig.CONFIG_BIGTABLE_INSTANCE_ID)); + if (credentialsProvider.isPresent()) { + adminSettingsBuilder.setCredentialsProvider(credentialsProvider.get()); + } else { + // Use the default credential provider that utilizes Application Default Credentials. + } + + BigtableTableAdminStubSettings.Builder adminStubSettings = adminSettingsBuilder.stubSettings(); + adminStubSettings.createTableSettings().setRetrySettings(retrySettings); + adminStubSettings.modifyColumnFamiliesSettings().setRetrySettings(retrySettings); + adminStubSettings.listTablesSettings().setRetrySettings(retrySettings); + adminStubSettings.getTableSettings().setRetrySettings(retrySettings); + try { + return BigtableTableAdminClient.create(adminSettingsBuilder.build()); + } catch (IOException e) { + throw new RetriableException(e); + } + } + + /** + * @return {@link BigtableDataClient} connected to Cloud Bigtable instance configured as described + * in {@link BigtableSinkConfig#getDefinition()}. + */ + public BigtableDataClient getBigtableDataClient() { + RetrySettings retrySettings = getRetrySettings(); + Optional credentialsProvider = + getUserConfiguredBigtableCredentialsProvider(); + + BigtableDataSettings.Builder dataSettingsBuilder = + BigtableDataSettings.newBuilder() + .setProjectId(getString(BigtableSinkTaskConfig.CONFIG_GCP_PROJECT_ID)) + .setInstanceId(getString(BigtableSinkTaskConfig.CONFIG_BIGTABLE_INSTANCE_ID)); + if (credentialsProvider.isPresent()) { + dataSettingsBuilder.setCredentialsProvider(credentialsProvider.get()); + } else { + // Use the default credential provider that utilizes Application Default Credentials. + } + String appProfileId = getString(BigtableSinkTaskConfig.CONFIG_BIGTABLE_APP_PROFILE_ID); + if (appProfileId == null) { + dataSettingsBuilder.setDefaultAppProfileId(); + } else { + dataSettingsBuilder.setAppProfileId(appProfileId); + } + + EnhancedBigtableStubSettings.Builder dataStubSettings = dataSettingsBuilder.stubSettings(); + dataStubSettings.mutateRowSettings().setRetrySettings(retrySettings); + dataStubSettings.bulkMutateRowsSettings().setRetrySettings(retrySettings); + dataStubSettings.readRowSettings().setRetrySettings(retrySettings); + dataStubSettings.readRowsSettings().setRetrySettings(retrySettings); + + try { + return BigtableDataClient.create(dataSettingsBuilder.build()); + } catch (IOException e) { + throw new RetriableException(e); + } + } + + /** + * Checks whether Cloud Bigtable configuration is valid by connecting to Cloud Bigtable and + * attempting to execute a simple read-only operation. + * + * @return true if Cloud Bigtable configuration is valid, false otherwise. + */ + @VisibleForTesting + boolean isBigtableConfigurationValid() { + BigtableTableAdminClient bigtable = null; + try { + RetrySettings retrySettings = + RetrySettings.newBuilder() + .setMaxAttempts(0) + .setTotalTimeout( + Duration.of(BIGTABLE_CREDENTIALS_CHECK_TIMEOUT_SECONDS, ChronoUnit.SECONDS)) + .build(); + bigtable = getBigtableAdminClient(retrySettings); + bigtable.listTables(); + return true; + } catch (Throwable t) { + return false; + } finally { + if (bigtable != null) { + bigtable.close(); + } + } + } + + /** + * @return {@link RetrySettings} of Cloud Bigtable clients configured as described in {@link + * BigtableSinkConfig#getDefinition()}. + */ + protected RetrySettings getRetrySettings() { + return RetrySettings.newBuilder() + .setTotalTimeout( + Duration.of( + getLong(BigtableSinkTaskConfig.CONFIG_RETRY_TIMEOUT_MILLIS), ChronoUnit.MILLIS)) + .build(); + } + + /** + * Extracts typed enum value from this object. + * + * @param configName Enum parameter name in {@link BigtableSinkConfig}. + * @param converter Function that parses parameter value into an enum value. It's assumed to throw + * only {@link NullPointerException} and {@link IllegalArgumentException}. + * @return Parsed enum value. + * @param Enum type. + */ + private T getEnum(String configName, Function converter) { + String s = this.getString(configName); + try { + return converter.apply(s.toUpperCase()); + } catch (NullPointerException | IllegalArgumentException e) { + throw new ConfigException(configName, s); + } + } + + private static ConfigDef.Validator enumValidator(Enum[] enumValues) { + return ConfigDef.CaseInsensitiveValidString.in( + Arrays.stream(enumValues).map(Enum::name).toArray(String[]::new)); + } + + /** + * @return {@link Optional#empty()} if the user didn't configure the Cloud Bigtable credentials, + * {@link Optional} containing {@link CredentialsProvider} configured as described in {@link + * BigtableSinkConfig#getDefinition()} otherwise. + */ + protected Optional getUserConfiguredBigtableCredentialsProvider() { + String credentialsJson = getString(BigtableSinkTaskConfig.CONFIG_GCP_CREDENTIALS_JSON); + String credentialsPath = getString(BigtableSinkTaskConfig.CONFIG_GCP_CREDENTIALS_PATH); + byte[] credentials; + if (!Utils.isBlank(credentialsJson)) { + credentials = credentialsJson.getBytes(StandardCharsets.UTF_8); + } else if (!Utils.isBlank(credentialsPath)) { + try (FileInputStream is = new FileInputStream(credentialsPath)) { + credentials = is.readAllBytes(); + } catch (IOException e) { + throw new ConfigException( + String.format("Error getting credentials from file: %s.", credentialsPath)); + } + } else { + // We will use the default CredentialsProvider, which doesn't need any application-level + // configuration. + return Optional.empty(); + } + try { + return Optional.of( + FixedCredentialsProvider.create( + GoogleCredentials.fromStream(new ByteArrayInputStream(credentials)))); + } catch (IOException e) { + throw new ConfigException("Cloud Bigtable credentials creation failed."); + } + } +} diff --git a/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/config/BigtableSinkTaskConfig.java b/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/config/BigtableSinkTaskConfig.java new file mode 100644 index 000000000..48129c32f --- /dev/null +++ b/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/config/BigtableSinkTaskConfig.java @@ -0,0 +1,49 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.kafka.connect.bigtable.config; + +import java.util.Map; +import org.apache.kafka.common.config.ConfigDef; + +/** + * A class defining configuration of {@link + * com.google.cloud.kafka.connect.bigtable.BigtableSinkTask}. + */ +public class BigtableSinkTaskConfig extends BigtableSinkConfig { + public static String CONFIG_TASK_ID = "taskId"; + + /** + * The main constructor. + * + * @param properties The properties provided by the caller. + */ + public BigtableSinkTaskConfig(Map properties) { + super(getDefinition(), properties); + } + + /** + * @return {@link ConfigDef} used by Kafka Connect to advertise configuration options to the user + * and by us to perform basic validation of the user-provided values. + */ + public static ConfigDef getDefinition() { + return BigtableSinkConfig.getDefinition() + .defineInternal( + CONFIG_TASK_ID, + ConfigDef.Type.INT, + ConfigDef.NO_DEFAULT_VALUE, + ConfigDef.Importance.LOW); + } +} diff --git a/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/config/InsertMode.java b/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/config/InsertMode.java new file mode 100644 index 000000000..a34481aa3 --- /dev/null +++ b/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/config/InsertMode.java @@ -0,0 +1,21 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.kafka.connect.bigtable.config; + +public enum InsertMode { + INSERT, + UPSERT, +} diff --git a/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/config/NullValueMode.java b/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/config/NullValueMode.java new file mode 100644 index 000000000..ad7a208f7 --- /dev/null +++ b/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/config/NullValueMode.java @@ -0,0 +1,22 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.kafka.connect.bigtable.config; + +public enum NullValueMode { + WRITE, + IGNORE, + DELETE, +} diff --git a/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/exception/BatchException.java b/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/exception/BatchException.java new file mode 100644 index 000000000..67575ac26 --- /dev/null +++ b/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/exception/BatchException.java @@ -0,0 +1,28 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.kafka.connect.bigtable.exception; + +import org.apache.kafka.connect.errors.ConnectException; + +/** + * A wrapper exception class that may be thrown to explicitly mark a throw as supposed to fail an + * entire batch of input records. + */ +public class BatchException extends ConnectException { + public BatchException(Throwable t) { + super(t); + } +} diff --git a/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/exception/InvalidBigtableSchemaModificationException.java b/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/exception/InvalidBigtableSchemaModificationException.java new file mode 100644 index 000000000..8c2fcfc5b --- /dev/null +++ b/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/exception/InvalidBigtableSchemaModificationException.java @@ -0,0 +1,29 @@ +/* + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.kafka.connect.bigtable.exception; + +import org.apache.kafka.connect.errors.DataException; + +/** + * An {@link Exception} that signifies that input {@link org.apache.kafka.connect.sink.SinkRecord + * SinkRecord(s)} cause attempt of invalid Cloud Bigtable schema modification and thus is invalid + * and should not be retried. + */ +public class InvalidBigtableSchemaModificationException extends DataException { + public InvalidBigtableSchemaModificationException(String message) { + super(message); + } +} diff --git a/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/mapping/KeyMapper.java b/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/mapping/KeyMapper.java new file mode 100644 index 000000000..81436dc90 --- /dev/null +++ b/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/mapping/KeyMapper.java @@ -0,0 +1,262 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.kafka.connect.bigtable.mapping; + +import com.google.cloud.kafka.connect.bigtable.config.BigtableSinkConfig; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.util.AbstractMap; +import java.util.Arrays; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.apache.kafka.connect.data.Field; +import org.apache.kafka.connect.data.Struct; +import org.apache.kafka.connect.errors.DataException; + +/** + * A class responsible for converting Kafka {@link org.apache.kafka.connect.sink.SinkRecord + * SinkRecord(s)} into Cloud Bigtable row keys. + */ +public class KeyMapper { + final List> definition; + final byte[] delimiter; + + /** + * The main constructor. + * + * @param delimiter Delimiter in the mapping as per {@link + * com.google.cloud.kafka.connect.bigtable.config.BigtableSinkConfig#CONFIG_ROW_KEY_DELIMITER} + * @param definition Definition of the mapping as per {@link + * com.google.cloud.kafka.connect.bigtable.config.BigtableSinkConfig#CONFIG_ROW_KEY_DEFINITION}. + */ + public KeyMapper(String delimiter, List definition) { + this.delimiter = delimiter.getBytes(StandardCharsets.UTF_8); + this.definition = + definition.stream() + .map(s -> s.split("\\.")) + .map(Arrays::asList) + .collect(Collectors.toList()); + } + + /** + * Converts input data into Cloud Bigtable row key bytes as described in {@link + * BigtableSinkConfig#getDefinition()}. + * + * @param kafkaKey An {@link Object} to be converted into Cloud Bigtable row key. + * @return {@link Optional#empty()} if the input doesn't convert into a valid Cloud Bigtable row + * key, {@link Optional} containing row Cloud Bigtable row key bytes the input converts into + * otherwise. + */ + public byte[] getKey(Object kafkaKey) { + ensureKeyElementIsNotNull(kafkaKey); + Stream keyParts = + this.getDefinition(kafkaKey).stream() + .map((d) -> serializeTopLevelKeyElement(extractField(kafkaKey, d.iterator()))); + return concatenateByteArrays(new byte[0], keyParts, delimiter, new byte[0]); + } + + /** + * Returns key definition as configured during object creation or extracted from the object being + * mapped if it's been configured to an empty {@link List}. + * + * @param kafkaKey {@link org.apache.kafka.connect.sink.SinkRecord SinkRecord's} key. + * @return {@link List} containing {@link List Lists} of key fields that need to be retrieved and + * concatenated to construct the Cloud Bigtable row key. + *

See {@link KeyMapper#extractField(Object, Iterator)} for details on semantics of the + * inner list. + */ + private List> getDefinition(Object kafkaKey) { + if (this.definition.isEmpty()) { + Optional> maybeRootFields = getFieldsOfRootValue(kafkaKey); + if (maybeRootFields.isEmpty()) { + List rootElementDefinition = List.of(); + return List.of(rootElementDefinition); + } else { + return maybeRootFields.get().stream() + .map(Collections::singletonList) + .collect(Collectors.toList()); + } + } + return this.definition; + } + + /** + * Extracts names of child fields of the value. + * + * @param kafkaKey {@link org.apache.kafka.connect.sink.SinkRecord SinkRecord's} key. + * @return {@link Optional#empty()} if the input value has no children, {@link Optional} + * containing names of its child fields otherwise. + */ + private static Optional> getFieldsOfRootValue(Object kafkaKey) { + if (kafkaKey instanceof Struct) { + return Optional.of( + ((Struct) kafkaKey) + .schema().fields().stream().map(Field::name).collect(Collectors.toList())); + } else if (kafkaKey instanceof Map) { + return Optional.of( + ((Map) kafkaKey) + .keySet().stream().map(Object::toString).collect(Collectors.toList())); + } else { + return Optional.empty(); + } + } + + /** + * Extract possibly nested fields from the input value. + * + * @param value {@link org.apache.kafka.connect.sink.SinkRecord SinkRecord's} key or some its + * child. + * @param fields Fields that need to be accessed before the target value is reached. + * @return Extracted nested field. + */ + private Object extractField(Object value, Iterator fields) { + ensureKeyElementIsNotNull(value); + if (!fields.hasNext()) { + return value; + } + String field = fields.next(); + if (value instanceof Struct) { + Struct struct = (Struct) value; + // Note that getWithoutDefault() throws if such a field does not exist. + return extractField(struct.getWithoutDefault(field), fields); + } else if (value instanceof Map) { + Map map = (Map) value; + if (!map.containsKey(field)) { + throw new DataException("Map contains no value for key `" + field + "`."); + } + return extractField(map.get(field), fields); + } else { + throw new DataException( + "Unexpected class `" + + value.getClass() + + "` doesn't " + + "support extracting field `" + + field + + "` using a dot."); + } + } + + private static byte[] serializeTopLevelKeyElement(Object keyElement) { + ensureKeyElementIsNotNull(keyElement); + return serializeKeyElement(keyElement); + } + + /** + * Serializes Kafka Connect entry key. + * + *

We implement custom serialization since {@link Object#toString()} mangles arrays. + * + * @param keyElement {@link org.apache.kafka.connect.sink.SinkRecord SinkRecord's} key to be + * serialized. + * @return Serialization of the input value. + */ + private static byte[] serializeKeyElement(Object keyElement) { + if (keyElement == null) { + // Note that it's needed for serializing null-containing Maps and Lists. + return "null".getBytes(StandardCharsets.UTF_8); + } else if (keyElement instanceof byte[]) { + // Note that it breaks compatibility with Confluent's sink. + return (byte[]) keyElement; + } else if (keyElement instanceof ByteBuffer) { + return ((ByteBuffer) keyElement).array(); + } else if (keyElement instanceof List) { + List list = (List) keyElement; + return concatenateByteArrays( + "[", list.stream().map(o -> o.toString().getBytes(StandardCharsets.UTF_8)), ", ", "]"); + } else if (keyElement instanceof Map) { + Map map = (Map) keyElement; + return concatenateByteArrays( + "{", + map.entrySet().stream() + .map( + e -> + concatenateByteArrays( + new byte[0], + Stream.of( + serializeKeyElement(e.getKey()), serializeKeyElement(e.getValue())), + "=".getBytes(StandardCharsets.UTF_8), + new byte[0])), + // Note that Map and Struct have different delimiters for compatibility's sake. + ", ", + "}"); + } else if (keyElement instanceof Struct) { + Struct struct = (Struct) keyElement; + return concatenateByteArrays( + "Struct{", + struct.schema().fields().stream() + .flatMap( + f -> + Optional.ofNullable(struct.get(f)) + .map(v -> new AbstractMap.SimpleImmutableEntry<>(f.name(), v)) + .stream()) + .map( + e -> + concatenateByteArrays( + new byte[0], + Stream.of( + serializeKeyElement(e.getKey()), serializeKeyElement(e.getValue())), + "=".getBytes(StandardCharsets.UTF_8), + new byte[0])), + // Note that Map and Struct have different delimiters for compatibility's sake. + ",", + "}"); + } else { + // TODO: handle logical data types. + return keyElement.toString().getBytes(StandardCharsets.UTF_8); + } + } + + private static void ensureKeyElementIsNotNull(Object value) { + if (value == null) { + // Matching Confluent's sink behavior. + throw new DataException("Error with row key definition: row key fields cannot be null."); + } + } + + private static byte[] concatenateByteArrays( + byte[] start, Stream byteArrays, byte[] delimiter, byte[] end) { + try (ByteArrayOutputStream bos = new ByteArrayOutputStream()) { + bos.write(start); + for (Iterator it = byteArrays.iterator(); it.hasNext(); ) { + byte[] keyPart = it.next(); + bos.write(keyPart); + if (it.hasNext()) { + bos.write(delimiter); + } + } + bos.write(end); + return bos.toByteArray(); + } catch (IOException e) { + throw new DataException("Concatenation of Cloud Bigtable key failed.", e); + } + } + + private static byte[] concatenateByteArrays( + String start, Stream byteArrays, String delimiter, String end) { + return concatenateByteArrays( + start.getBytes(StandardCharsets.UTF_8), + byteArrays, + delimiter.getBytes(StandardCharsets.UTF_8), + end.getBytes(StandardCharsets.UTF_8)); + } +} diff --git a/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/mapping/MutationData.java b/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/mapping/MutationData.java new file mode 100644 index 000000000..65d4fc1c4 --- /dev/null +++ b/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/mapping/MutationData.java @@ -0,0 +1,63 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.kafka.connect.bigtable.mapping; + +import com.google.cloud.bigtable.data.v2.models.Mutation; +import com.google.cloud.bigtable.data.v2.models.RowMutationEntry; +import com.google.protobuf.ByteString; +import java.util.Set; + +/** + * A class representing single Kafka {@link org.apache.kafka.connect.sink.SinkRecord SinkRecord's} + * output to be written into Cloud Bigtable. + */ +public class MutationData { + private final String targetTable; + private final ByteString rowKey; + private final Mutation mutation; + private final Set requiredColumnFamilies; + + public MutationData( + String targetTable, + ByteString rowKey, + Mutation mutation, + Set requiredColumnFamilies) { + this.targetTable = targetTable; + this.rowKey = rowKey; + this.mutation = mutation; + this.requiredColumnFamilies = requiredColumnFamilies; + } + + public String getTargetTable() { + return targetTable; + } + + public ByteString getRowKey() { + return rowKey; + } + + public RowMutationEntry getUpsertMutation() { + return RowMutationEntry.createFromMutationUnsafe(this.rowKey, this.mutation); + } + + public Mutation getInsertMutation() { + return mutation; + } + + public Set getRequiredColumnFamilies() { + return requiredColumnFamilies; + } +} diff --git a/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/mapping/MutationDataBuilder.java b/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/mapping/MutationDataBuilder.java new file mode 100644 index 000000000..229853dae --- /dev/null +++ b/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/mapping/MutationDataBuilder.java @@ -0,0 +1,81 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.kafka.connect.bigtable.mapping; + +import com.google.cloud.bigtable.data.v2.models.Mutation; +import com.google.cloud.bigtable.data.v2.models.Range; +import com.google.common.annotations.VisibleForTesting; +import com.google.protobuf.ByteString; +import java.util.HashSet; +import java.util.Optional; +import java.util.Set; + +/** A builder class for {@link MutationData}. */ +public class MutationDataBuilder { + private final Mutation mutation; + private boolean mutationIsEmpty; + private final Set requiredColumnFamilies; + + @VisibleForTesting + MutationDataBuilder(Mutation mutation) { + this.mutation = mutation; + mutationIsEmpty = true; + requiredColumnFamilies = new HashSet<>(); + } + + public MutationDataBuilder() { + this(Mutation.create()); + } + + /** + * Tries to convert this object into {@link MutationData}. + * + * @param targetTable - Cloud Bigtable {@link com.google.cloud.bigtable.admin.v2.models.Table} + * this mutation is to be written to. + * @param rowKey - Cloud Bigtable row key this mutation is to be written to. + * @return {@link Optional#empty()} if this mutation is empty, an {@link Optional} containing this + * mutation ready to be written to Cloud Bigtable otherwise. + */ + public Optional maybeBuild(String targetTable, ByteString rowKey) { + return this.mutationIsEmpty + ? Optional.empty() + : Optional.of( + new MutationData(targetTable, rowKey, this.mutation, this.requiredColumnFamilies)); + } + + public void deleteRow() { + mutationIsEmpty = false; + mutation.deleteRow(); + } + + public void deleteFamily(String columnFamily) { + mutationIsEmpty = false; + mutation.deleteFamily(columnFamily); + } + + public void deleteCells( + String columnFamily, ByteString columnQualifier, Range.TimestampRange timestampRange) { + mutationIsEmpty = false; + mutation.deleteCells(columnFamily, columnQualifier, timestampRange); + } + + public void setCell( + String columnFamily, ByteString columnQualifier, long timestampMicros, ByteString value) { + mutationIsEmpty = false; + requiredColumnFamilies.add(columnFamily); + mutation.setCell(columnFamily, columnQualifier, timestampMicros, value); + } +} diff --git a/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/mapping/ValueMapper.java b/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/mapping/ValueMapper.java new file mode 100644 index 000000000..f97fc78f5 --- /dev/null +++ b/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/mapping/ValueMapper.java @@ -0,0 +1,245 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.kafka.connect.bigtable.mapping; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.SerializerProvider; +import com.fasterxml.jackson.databind.module.SimpleModule; +import com.fasterxml.jackson.databind.ser.std.StdSerializer; +import com.google.cloud.ByteArray; +import com.google.cloud.bigtable.data.v2.models.Range; +import com.google.cloud.kafka.connect.bigtable.config.NullValueMode; +import com.google.common.annotations.VisibleForTesting; +import com.google.protobuf.ByteString; +import java.io.IOException; +import java.math.BigDecimal; +import java.nio.charset.StandardCharsets; +import java.util.AbstractMap; +import java.util.ArrayList; +import java.util.Date; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.kafka.common.utils.Utils; +import org.apache.kafka.connect.data.Field; +import org.apache.kafka.connect.data.Schema; +import org.apache.kafka.connect.data.Struct; +import org.apache.kafka.connect.errors.DataException; + +/** + * A class responsible for converting Kafka {@link org.apache.kafka.connect.sink.SinkRecord + * SinkRecord(s)} into Cloud Bigtable {@link com.google.cloud.bigtable.data.v2.models.Mutation + * Mutation(s)}. + */ +public class ValueMapper { + public final String defaultColumnFamily; + public final ByteString defaultColumnQualifier; + private final NullValueMode nullMode; + private static final ObjectMapper jsonMapper = getJsonMapper(); + + /** + * The main constructor. + * + * @param defaultColumnFamily Default column family as per {@link + * com.google.cloud.kafka.connect.bigtable.config.BigtableSinkConfig#CONFIG_DEFAULT_COLUMN_FAMILY}. + * @param defaultColumnQualifier Default column as per {@link + * com.google.cloud.kafka.connect.bigtable.config.BigtableSinkConfig#CONFIG_ROW_KEY_DELIMITER}. + */ + public ValueMapper( + String defaultColumnFamily, String defaultColumnQualifier, @Nonnull NullValueMode nullMode) { + this.defaultColumnFamily = Utils.isBlank(defaultColumnFamily) ? null : defaultColumnFamily; + this.defaultColumnQualifier = + Utils.isBlank(defaultColumnQualifier) + ? null + : ByteString.copyFrom(defaultColumnQualifier.getBytes(StandardCharsets.UTF_8)); + this.nullMode = nullMode; + } + + /** + * Creates a {@link MutationDataBuilder} that can be used to create a {@link MutationData} + * representing the input Kafka Connect value as Cloud Bigtable mutations that need to be applied. + * + * @param rootKafkaValue The value to be converted into Cloud Bigtable {@link + * com.google.cloud.bigtable.data.v2.models.Mutation Mutation(s)}. + * @param timestampMicros The timestamp the mutations will be created at in microseconds. + */ + public MutationDataBuilder getRecordMutationDataBuilder( + Object rootKafkaValue, long timestampMicros) { + MutationDataBuilder mutationDataBuilder = createMutationDataBuilder(); + if (rootKafkaValue == null && nullMode == NullValueMode.IGNORE) { + // Do nothing + } else if (rootKafkaValue == null && nullMode == NullValueMode.DELETE) { + mutationDataBuilder.deleteRow(); + } else if (rootKafkaValue instanceof Map || rootKafkaValue instanceof Struct) { + for (Map.Entry field : getChildren(rootKafkaValue)) { + String kafkaFieldName = field.getKey().toString(); + Object kafkaFieldValue = field.getValue(); + if (kafkaFieldValue == null && nullMode == NullValueMode.IGNORE) { + continue; + } else if (kafkaFieldValue == null && nullMode == NullValueMode.DELETE) { + mutationDataBuilder.deleteFamily(kafkaFieldName); + } else if (kafkaFieldValue instanceof Map || kafkaFieldValue instanceof Struct) { + for (Map.Entry subfield : getChildren(kafkaFieldValue)) { + ByteString kafkaSubfieldName = + ByteString.copyFrom(subfield.getKey().toString().getBytes(StandardCharsets.UTF_8)); + Object kafkaSubfieldValue = subfield.getValue(); + if (kafkaSubfieldValue == null && nullMode == NullValueMode.IGNORE) { + continue; + } else if (kafkaSubfieldValue == null && nullMode == NullValueMode.DELETE) { + mutationDataBuilder.deleteCells( + kafkaFieldName, + kafkaSubfieldName, + Range.TimestampRange.create(0, timestampMicros)); + } else { + mutationDataBuilder.setCell( + kafkaFieldName, + kafkaSubfieldName, + timestampMicros, + ByteString.copyFrom(serialize(kafkaSubfieldValue))); + } + } + } else { + if (defaultColumnFamily != null) { + mutationDataBuilder.setCell( + defaultColumnFamily, + ByteString.copyFrom(kafkaFieldName.getBytes(StandardCharsets.UTF_8)), + timestampMicros, + ByteString.copyFrom(serialize(kafkaFieldValue))); + } + } + } + } else { + if (defaultColumnFamily != null && defaultColumnQualifier != null) { + mutationDataBuilder.setCell( + defaultColumnFamily, + defaultColumnQualifier, + timestampMicros, + ByteString.copyFrom(serialize(rootKafkaValue))); + } + } + return mutationDataBuilder; + } + + @VisibleForTesting + // Method only needed for use in tests. It could be inlined otherwise. + protected MutationDataBuilder createMutationDataBuilder() { + return new MutationDataBuilder(); + } + + /** + * @param mapOrStruct {@link Map} or {@link Struct} whose children we want to list + * @return {@link List} of names or keys of input value's child entries. + */ + private static List> getChildren(Object mapOrStruct) { + if (mapOrStruct instanceof Map) { + @SuppressWarnings("unchecked") + Map kafkaMapValue = (Map) mapOrStruct; + return new ArrayList<>(kafkaMapValue.entrySet()); + } else if (mapOrStruct instanceof Struct) { + Struct kafkaStructValue = (Struct) mapOrStruct; + return kafkaStructValue.schema().fields().stream() + .map( + f -> + new AbstractMap.SimpleImmutableEntry<>( + (Object) f.name(), kafkaStructValue.get(f))) + .collect(Collectors.toList()); + } else { + throw new IllegalStateException(); + } + } + + /** + * @param value Input value. + * @return Input value's serialization's bytes that will be written to Cloud Bigtable as a cell's + * value. + */ + private static byte[] serialize(Object value) { + if (value == null) { + return new byte[0]; + } + if (value instanceof byte[]) { + return (byte[]) value; + } else if (value instanceof ByteArray) { + return serialize(((ByteArray) value).toByteArray()); + } else if (value instanceof Integer) { + return Bytes.toBytes((Integer) value); + } else if (value instanceof Long) { + return Bytes.toBytes((Long) value); + } else if (value instanceof Short) { + return Bytes.toBytes((Short) value); + } else if (value instanceof Byte) { + return Bytes.toBytes((Byte) value); + } else if (value instanceof Float) { + return Bytes.toBytes((Float) value); + } else if (value instanceof Double) { + return Bytes.toBytes((Double) value); + } else if (value instanceof Boolean) { + return Bytes.toBytes((Boolean) value); + } else if (value instanceof String) { + return Bytes.toBytes((String) value); + } else if (value instanceof Character) { + return serialize(Character.toString((Character) value)); + } else if (value instanceof Date) { + // TODO: implement. + throw new DataException("TODO"); + } else if (value instanceof BigDecimal) { + // TODO: implement. + throw new DataException("TODO"); + } else if (value instanceof Map || value instanceof Struct || value instanceof List) { + try { + return jsonMapper.writeValueAsBytes(value); + } catch (JsonProcessingException e) { + throw new DataException("Failed to deserialize a(n) " + value.getClass(), e); + } + } else { + throw new DataException( + "Unsupported serialization of an unexpected class `" + value.getClass() + "`."); + } + } + + /** + * @return {@link ObjectMapper} that can serialize all the Kafka Connect types. + */ + private static ObjectMapper getJsonMapper() { + ObjectMapper mapper = new ObjectMapper(); + SimpleModule mapperModule = new SimpleModule("KafkaConnectSerializer"); + mapperModule.addSerializer(Struct.class, new StructJsonSerializer(Struct.class)); + mapper.registerModule(mapperModule); + return mapper; + } + + private static class StructJsonSerializer extends StdSerializer { + protected StructJsonSerializer(Class t) { + super(t); + } + + @Override + public void serialize(Struct value, JsonGenerator gen, SerializerProvider provider) + throws IOException { + Schema schema = value.schema(); + gen.writeStartObject(); + for (Field field : schema.fields()) { + String fieldName = field.name(); + gen.writeObjectField(fieldName, value.getWithoutDefault(fieldName)); + } + gen.writeEndObject(); + } + } +} diff --git a/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/version/PackageMetadata.java b/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/version/PackageMetadata.java new file mode 100644 index 000000000..0954d8d01 --- /dev/null +++ b/google-cloud-bigtable-kafka-connect-sink/src/main/java/com/google/cloud/kafka/connect/bigtable/version/PackageMetadata.java @@ -0,0 +1,39 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.kafka.connect.bigtable.version; + +import java.util.Optional; + +/** A class responsible for extracting maven-generated package metadata. */ +public class PackageMetadata { + public static String UNKNOWN_VERSION = "unknown"; + + /** + * Extracts version information from the package metadata. + * + * @return String representation of the version of the package. Is equal to {@link + * PackageMetadata#UNKNOWN_VERSION} when the information is missing from package metadata. + */ + public static String getVersion() { + Optional discoveredVersion = Optional.empty(); + try { + discoveredVersion = + Optional.ofNullable(PackageMetadata.class.getPackage().getImplementationVersion()); + } catch (NullPointerException ignored) { + } + return discoveredVersion.orElse(UNKNOWN_VERSION); + } +} diff --git a/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/BigtableSinkConnectorTest.java b/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/BigtableSinkConnectorTest.java new file mode 100644 index 000000000..6b3141d80 --- /dev/null +++ b/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/BigtableSinkConnectorTest.java @@ -0,0 +1,84 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.kafka.connect.bigtable; + +import static com.google.cloud.kafka.connect.bigtable.config.BigtableSinkTaskConfig.CONFIG_TASK_ID; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; + +import com.google.cloud.kafka.connect.bigtable.util.BasicPropertiesFactory; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +@RunWith(JUnit4.class) +public class BigtableSinkConnectorTest { + BigtableSinkConnector connector; + + @Before + public void setUp() { + connector = new BigtableSinkConnector(); + } + + @Test + public void testConfig() { + assertNotNull(connector.config()); + } + + @Test + public void testValidate() { + connector.validate(BasicPropertiesFactory.getSinkProps()); + } + + @Test + public void testStart() { + connector.start(BasicPropertiesFactory.getSinkProps()); + } + + @Test + public void testStop() { + connector.stop(); + } + + @Test + public void testTaskClass() { + assertEquals(BigtableSinkTask.class, connector.taskClass()); + } + + @Test + public void testTaskConfigs() { + Map connectorConfig = BasicPropertiesFactory.getSinkProps(); + connector.start(new HashMap<>(connectorConfig)); + int maxTasks = 1000; + List> taskConfigs = connector.taskConfigs(maxTasks); + assertEquals(maxTasks, taskConfigs.size()); + for (Integer i = 0; i < maxTasks; i++) { + Map taskConfig = taskConfigs.get(i); + assertEquals(i.toString(), taskConfig.get(CONFIG_TASK_ID)); + taskConfig.remove(CONFIG_TASK_ID); + assertEquals(connectorConfig, taskConfig); + } + } + + @Test + public void testVersion() { + assertNotNull(connector.version()); + } +} diff --git a/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/BigtableSinkTaskTest.java b/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/BigtableSinkTaskTest.java new file mode 100644 index 000000000..3348d4bf6 --- /dev/null +++ b/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/BigtableSinkTaskTest.java @@ -0,0 +1,612 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.kafka.connect.bigtable; + +import static com.google.cloud.kafka.connect.bigtable.config.BigtableSinkConfig.CONFIG_AUTO_CREATE_COLUMN_FAMILIES; +import static com.google.cloud.kafka.connect.bigtable.config.BigtableSinkConfig.CONFIG_AUTO_CREATE_TABLES; +import static com.google.cloud.kafka.connect.bigtable.config.BigtableSinkConfig.CONFIG_ERROR_MODE; +import static com.google.cloud.kafka.connect.bigtable.config.BigtableSinkConfig.CONFIG_INSERT_MODE; +import static com.google.cloud.kafka.connect.bigtable.config.BigtableSinkConfig.CONFIG_TABLE_NAME_FORMAT; +import static com.google.cloud.kafka.connect.bigtable.util.FutureUtil.completedApiFuture; +import static com.google.cloud.kafka.connect.bigtable.util.MockUtil.assertTotalNumberOfInvocations; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.anyLong; +import static org.mockito.Mockito.anyString; +import static org.mockito.Mockito.argThat; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.reset; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.verifyNoMoreInteractions; +import static org.mockito.MockitoAnnotations.openMocks; + +import com.google.api.gax.batching.Batcher; +import com.google.api.gax.rpc.ApiException; +import com.google.bigtable.admin.v2.Table; +import com.google.cloud.bigtable.admin.v2.BigtableTableAdminClient; +import com.google.cloud.bigtable.data.v2.BigtableDataClient; +import com.google.cloud.bigtable.data.v2.models.Mutation; +import com.google.cloud.bigtable.data.v2.models.RowMutationEntry; +import com.google.cloud.kafka.connect.bigtable.autocreate.BigtableSchemaManager; +import com.google.cloud.kafka.connect.bigtable.autocreate.ResourceCreationResult; +import com.google.cloud.kafka.connect.bigtable.config.BigtableErrorMode; +import com.google.cloud.kafka.connect.bigtable.config.BigtableSinkTaskConfig; +import com.google.cloud.kafka.connect.bigtable.config.InsertMode; +import com.google.cloud.kafka.connect.bigtable.exception.InvalidBigtableSchemaModificationException; +import com.google.cloud.kafka.connect.bigtable.mapping.KeyMapper; +import com.google.cloud.kafka.connect.bigtable.mapping.MutationData; +import com.google.cloud.kafka.connect.bigtable.mapping.MutationDataBuilder; +import com.google.cloud.kafka.connect.bigtable.mapping.ValueMapper; +import com.google.cloud.kafka.connect.bigtable.util.ApiExceptionFactory; +import com.google.cloud.kafka.connect.bigtable.util.BasicPropertiesFactory; +import com.google.cloud.kafka.connect.bigtable.util.FutureUtil; +import com.google.protobuf.ByteString; +import java.nio.charset.StandardCharsets; +import java.util.AbstractMap; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Future; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import org.apache.kafka.common.record.TimestampType; +import org.apache.kafka.connect.errors.ConnectException; +import org.apache.kafka.connect.sink.ErrantRecordReporter; +import org.apache.kafka.connect.sink.SinkRecord; +import org.apache.kafka.connect.sink.SinkTaskContext; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; +import org.mockito.Mock; +import org.slf4j.Logger; + +@RunWith(JUnit4.class) +public class BigtableSinkTaskTest { + TestBigtableSinkTask task; + BigtableSinkTaskConfig config; + @Mock BigtableDataClient bigtableData; + @Mock BigtableTableAdminClient bigtableAdmin; + @Mock KeyMapper keyMapper; + @Mock ValueMapper valueMapper; + @Mock BigtableSchemaManager schemaManager; + @Mock SinkTaskContext context; + @Mock ErrantRecordReporter errorReporter; + + @Before + public void setUp() { + openMocks(this); + config = new BigtableSinkTaskConfig(BasicPropertiesFactory.getTaskProps()); + } + + @Test + public void testStart() { + task = spy(new TestBigtableSinkTask(null, null, null, null, null, null, null)); + task.start(BasicPropertiesFactory.getTaskProps()); + } + + @Test + public void testStop() throws InterruptedException { + for (List test : + List.of( + List.of(false, false), + List.of(false, true), + List.of(true, false), + List.of(true, true))) { + assertEquals(2, test.size()); + boolean adminIsNotNull = test.get(0); + boolean dataIsNotNull = test.get(1); + int expectedAdminCloseCallCount = adminIsNotNull ? 1 : 0; + int expectedDataCloseCallCount = dataIsNotNull ? 1 : 0; + + BigtableTableAdminClient maybeAdmin = adminIsNotNull ? bigtableAdmin : null; + BigtableDataClient maybeData = dataIsNotNull ? bigtableData : null; + task = new TestBigtableSinkTask(null, maybeData, maybeAdmin, null, null, null, null); + Batcher batcher = mock(Batcher.class); + doReturn(completedApiFuture(null)).when(batcher).closeAsync(); + task.getBatchers().put("batcherTable", batcher); + + doThrow(new RuntimeException()).when(bigtableAdmin).close(); + doThrow(new RuntimeException()).when(bigtableData).close(); + + assertFalse(task.getBatchers().isEmpty()); + task.stop(); + assertTrue(task.getBatchers().isEmpty()); + verify(bigtableAdmin, times(expectedAdminCloseCallCount)).close(); + verify(bigtableData, times(expectedDataCloseCallCount)).close(); + verify(batcher, times(1)).closeAsync(); + + reset(bigtableAdmin); + reset(bigtableData); + } + } + + @Test + public void testVersion() { + task = spy(new TestBigtableSinkTask(null, null, null, null, null, null, null)); + assertNotNull(task.version()); + } + + @Test + public void testGetTableName() { + SinkRecord record = new SinkRecord("topic${test}", 1, null, null, null, null, 1); + for (Map.Entry test : + List.of( + new AbstractMap.SimpleImmutableEntry<>( + "prefix_${topic}_suffix", "prefix_topic${test}_suffix"), + new AbstractMap.SimpleImmutableEntry<>( + "prefix_${topic_suffix", "prefix_${topic_suffix"), + new AbstractMap.SimpleImmutableEntry<>("prefix_$topic_suffix", "prefix_$topic_suffix"), + new AbstractMap.SimpleImmutableEntry<>("prefix_${bad}_suffix", "prefix_${bad}_suffix"), + new AbstractMap.SimpleImmutableEntry<>("noSubstitution", "noSubstitution"))) { + Map props = BasicPropertiesFactory.getTaskProps(); + props.put(CONFIG_TABLE_NAME_FORMAT, test.getKey()); + task = + new TestBigtableSinkTask( + new BigtableSinkTaskConfig(props), null, null, null, null, null, null); + assertEquals(test.getValue(), task.getTableName(record)); + } + } + + @Test + public void testCreateRecordMutationDataEmptyKey() { + task = new TestBigtableSinkTask(config, null, null, keyMapper, null, null, null); + doReturn(new byte[0]).when(keyMapper).getKey(any()); + SinkRecord record = new SinkRecord("topic", 1, null, new Object(), null, null, 1); + assertThrows(ConnectException.class, () -> task.createRecordMutationData(record)); + } + + @Test + public void testCreateRecordMutationDataNonemptyKey() { + SinkRecord record = new SinkRecord("topic", 1, null, new Object(), null, null, 1); + task = new TestBigtableSinkTask(config, null, null, keyMapper, valueMapper, null, null); + + byte[] rowKey = "rowKey".getBytes(StandardCharsets.UTF_8); + doReturn(rowKey).when(keyMapper).getKey(any()); + doAnswer( + i -> { + MutationDataBuilder builder = new MutationDataBuilder(); + return builder; + }) + .when(valueMapper) + .getRecordMutationDataBuilder(any(), anyLong()); + assertTrue(task.createRecordMutationData(record).isEmpty()); + + doAnswer( + i -> { + MutationDataBuilder builder = new MutationDataBuilder(); + builder.deleteRow(); + return builder; + }) + .when(valueMapper) + .getRecordMutationDataBuilder(any(), anyLong()); + assertTrue(task.createRecordMutationData(record).isPresent()); + } + + @Test + public void testErrorReporterWithDLQ() { + doReturn(errorReporter).when(context).errantRecordReporter(); + task = new TestBigtableSinkTask(null, null, null, null, null, null, context); + SinkRecord record = new SinkRecord(null, 1, null, null, null, null, 1); + Throwable t = new Exception("testErrorReporterWithDLQ"); + verifyNoMoreInteractions(task.getLogger()); + task.reportError(record, t); + verify(errorReporter, times(1)).report(record, t); + } + + @Test + public void testErrorReporterNoDLQIgnoreMode() { + Map props = BasicPropertiesFactory.getTaskProps(); + props.put(CONFIG_ERROR_MODE, BigtableErrorMode.IGNORE.name()); + BigtableSinkTaskConfig config = new BigtableSinkTaskConfig(props); + + doThrow(new NoSuchMethodError()).when(context).errantRecordReporter(); + task = new TestBigtableSinkTask(config, null, null, null, null, null, context); + SinkRecord record = new SinkRecord(null, 1, null, null, null, null, 1); + verifyNoMoreInteractions(task.getLogger()); + verifyNoMoreInteractions(errorReporter); + task.reportError(record, new Exception("testErrorReporterWithDLQ")); + } + + @Test + public void testErrorReporterNoDLQWarnMode() { + Map props = BasicPropertiesFactory.getTaskProps(); + props.put(CONFIG_ERROR_MODE, BigtableErrorMode.WARN.name()); + BigtableSinkTaskConfig config = new BigtableSinkTaskConfig(props); + + doReturn(null).when(context).errantRecordReporter(); + task = new TestBigtableSinkTask(config, null, null, null, null, null, context); + SinkRecord record = new SinkRecord(null, 1, null, "key", null, null, 1); + Throwable t = new Exception("testErrorReporterNoDLQWarnMode"); + verifyNoMoreInteractions(errorReporter); + task.reportError(record, t); + verify(task.getLogger(), times(1)).warn(anyString(), eq(record.key()), eq(t)); + } + + @Test + public void testErrorReporterNoDLQFailMode() { + Map props = BasicPropertiesFactory.getTaskProps(); + props.put(CONFIG_ERROR_MODE, BigtableErrorMode.FAIL.name()); + BigtableSinkTaskConfig config = new BigtableSinkTaskConfig(props); + + doReturn(null).when(context).errantRecordReporter(); + task = new TestBigtableSinkTask(config, null, null, null, null, null, context); + SinkRecord record = new SinkRecord(null, 1, null, "key", null, null, 1); + Throwable t = new Exception("testErrorReporterNoDLQFailMode"); + verifyNoMoreInteractions(errorReporter); + verifyNoMoreInteractions(task.getLogger()); + assertThrows(ConnectException.class, () -> task.reportError(record, t)); + } + + @Test + public void testGetTimestamp() { + task = new TestBigtableSinkTask(null, null, null, null, null, null, null); + long timestampMillis = 123L; + SinkRecord recordWithTimestamp = + new SinkRecord( + null, 1, null, null, null, null, 1, timestampMillis, TimestampType.CREATE_TIME); + SinkRecord recordWithNullTimestamp = new SinkRecord(null, 1, null, null, null, null, 2); + + assertEquals( + (Long) (1000L * timestampMillis), (Long) task.getTimestampMicros(recordWithTimestamp)); + assertNotNull(task.getTimestampMicros(recordWithNullTimestamp)); + + // Assertion that the Java Bigtable client doesn't support microsecond timestamp granularity. + // When it starts supporting it, getTimestamp() will need to get modified. + assertEquals( + Arrays.stream(Table.TimestampGranularity.values()).collect(Collectors.toSet()), + Set.of( + Table.TimestampGranularity.TIMESTAMP_GRANULARITY_UNSPECIFIED, + Table.TimestampGranularity.MILLIS, + Table.TimestampGranularity.UNRECOGNIZED)); + } + + @Test + public void testHandleResults() { + SinkRecord errorSinkRecord = new SinkRecord("", 1, null, null, null, null, 1); + SinkRecord successSinkRecord = new SinkRecord("", 1, null, null, null, null, 2); + Map> perRecordResults = + Map.of( + errorSinkRecord, CompletableFuture.failedFuture(new Exception("testHandleResults")), + successSinkRecord, CompletableFuture.completedFuture(null)); + doReturn(errorReporter).when(context).errantRecordReporter(); + task = new TestBigtableSinkTask(null, null, null, null, null, null, context); + task.handleResults(perRecordResults); + verify(errorReporter, times(1)).report(eq(errorSinkRecord), any()); + assertTotalNumberOfInvocations(errorReporter, 1); + } + + @Test + public void testPrepareRecords() { + task = spy(new TestBigtableSinkTask(null, null, null, null, null, null, context)); + doReturn(errorReporter).when(context).errantRecordReporter(); + + MutationData okMutationData = mock(MutationData.class); + Exception exception = new RuntimeException(); + doThrow(exception) + .doReturn(Optional.empty()) + .doReturn(Optional.of(okMutationData)) + .when(task) + .createRecordMutationData(any()); + + SinkRecord exceptionRecord = new SinkRecord("", 1, null, null, null, null, 1); + SinkRecord emptyRecord = new SinkRecord("", 1, null, null, null, null, 3); + SinkRecord okRecord = new SinkRecord("", 1, null, null, null, null, 2); + + Map result = + task.prepareRecords(List.of(exceptionRecord, emptyRecord, okRecord)); + assertEquals(Map.of(okRecord, okMutationData), result); + verify(errorReporter, times(1)).report(exceptionRecord, exception); + assertTotalNumberOfInvocations(errorReporter, 1); + } + + @Test + public void testAutoCreateTablesAndHandleErrors() { + task = spy(new TestBigtableSinkTask(null, null, null, null, null, schemaManager, context)); + doReturn(errorReporter).when(context).errantRecordReporter(); + + doReturn(errorReporter).when(context).errantRecordReporter(); + SinkRecord okRecord = new SinkRecord("", 1, null, null, null, null, 1); + SinkRecord bigtableErrorRecord = new SinkRecord("", 1, null, null, null, null, 2); + SinkRecord dataErrorRecord = new SinkRecord("", 1, null, null, null, null, 3); + MutationData okMutationData = mock(MutationData.class); + MutationData bigtableErrorMutationData = mock(MutationData.class); + MutationData dataErrorMutationData = mock(MutationData.class); + + Map mutations = new HashMap<>(); + mutations.put(okRecord, okMutationData); + mutations.put(bigtableErrorRecord, bigtableErrorMutationData); + mutations.put(dataErrorRecord, dataErrorMutationData); + + ResourceCreationResult resourceCreationResult = + new ResourceCreationResult(Set.of(bigtableErrorRecord), Set.of(dataErrorRecord)); + doReturn(resourceCreationResult).when(schemaManager).ensureTablesExist(any()); + Map mutationsToApply = + task.autoCreateTablesAndHandleErrors(mutations); + + assertEquals(Map.of(okRecord, okMutationData), mutationsToApply); + verify(errorReporter, times(1)) + .report(eq(bigtableErrorRecord), argThat(e -> e instanceof ConnectException)); + verify(errorReporter, times(1)) + .report( + eq(dataErrorRecord), + argThat(e -> e instanceof InvalidBigtableSchemaModificationException)); + assertTotalNumberOfInvocations(errorReporter, 2); + } + + @Test + public void testAutoCreateColumnFamiliesAndHandleErrors() { + task = spy(new TestBigtableSinkTask(null, null, null, null, null, schemaManager, context)); + doReturn(errorReporter).when(context).errantRecordReporter(); + + doReturn(errorReporter).when(context).errantRecordReporter(); + SinkRecord okRecord = new SinkRecord("", 1, null, null, null, null, 1); + SinkRecord bigtableErrorRecord = new SinkRecord("", 1, null, null, null, null, 2); + SinkRecord dataErrorRecord = new SinkRecord("", 1, null, null, null, null, 3); + MutationData okMutationData = mock(MutationData.class); + MutationData bigtableErrorMutationData = mock(MutationData.class); + MutationData dataErrorMutationData = mock(MutationData.class); + + Map mutations = new HashMap<>(); + mutations.put(okRecord, okMutationData); + mutations.put(bigtableErrorRecord, bigtableErrorMutationData); + mutations.put(dataErrorRecord, dataErrorMutationData); + + ResourceCreationResult resourceCreationResult = + new ResourceCreationResult(Set.of(bigtableErrorRecord), Set.of(dataErrorRecord)); + doReturn(resourceCreationResult).when(schemaManager).ensureColumnFamiliesExist(any()); + Map mutationsToApply = + task.autoCreateColumnFamiliesAndHandleErrors(mutations); + + assertEquals(Map.of(okRecord, okMutationData), mutationsToApply); + verify(errorReporter, times(1)) + .report(eq(bigtableErrorRecord), argThat(e -> e instanceof ConnectException)); + verify(errorReporter, times(1)) + .report( + eq(dataErrorRecord), + argThat(e -> e instanceof InvalidBigtableSchemaModificationException)); + assertTotalNumberOfInvocations(errorReporter, 2); + } + + @Test + public void testInsertRows() throws ExecutionException, InterruptedException { + task = new TestBigtableSinkTask(null, bigtableData, null, null, null, null, null); + ApiException exception = ApiExceptionFactory.create(); + doReturn(false).doReturn(true).doThrow(exception).when(bigtableData).checkAndMutateRow(any()); + + SinkRecord successRecord = new SinkRecord("", 1, null, null, null, null, 1); + SinkRecord errorRecord = new SinkRecord("", 1, null, null, null, null, 2); + SinkRecord exceptionRecord = new SinkRecord("", 1, null, null, null, null, 3); + MutationData commonMutationData = mock(MutationData.class); + doReturn("ignored").when(commonMutationData).getTargetTable(); + doReturn(ByteString.copyFrom("ignored".getBytes(StandardCharsets.UTF_8))) + .when(commonMutationData) + .getRowKey(); + doReturn(mock(Mutation.class)).when(commonMutationData).getInsertMutation(); + + // LinkedHashMap, because we mock consecutive return values of Bigtable client mock and thus + // rely on the order. + Map input = new LinkedHashMap<>(); + input.put(successRecord, commonMutationData); + input.put(errorRecord, commonMutationData); + input.put(exceptionRecord, commonMutationData); + Map> output = new HashMap<>(); + task.insertRows(input, output); + + assertEquals(input.keySet(), output.keySet()); + verify(bigtableData, times(input.size())).checkAndMutateRow(any()); + assertTotalNumberOfInvocations(bigtableData, input.size()); + + output.get(successRecord).get(); + assertThrows(ExecutionException.class, () -> output.get(errorRecord).get()); + assertThrows(ExecutionException.class, () -> output.get(exceptionRecord).get()); + } + + @Test + public void testUpsertRows() { + Map props = BasicPropertiesFactory.getTaskProps(); + int maxBatchSize = 3; + int totalRecords = 1000; + props.put(BigtableSinkTaskConfig.CONFIG_MAX_BATCH_SIZE, Integer.toString(maxBatchSize)); + BigtableSinkTaskConfig config = new BigtableSinkTaskConfig(props); + + task = spy(new TestBigtableSinkTask(config, null, null, null, null, null, null)); + String batcherTable = "batcherTable"; + Batcher batcher = mock(Batcher.class); + doAnswer( + invocation -> { + TestBigtableSinkTask task = (TestBigtableSinkTask) invocation.getMock(); + task.getBatchers().computeIfAbsent(batcherTable, ignored -> batcher); + return null; + }) + .when(task) + .performUpsertBatch(any(), any()); + + MutationData commonMutationData = mock(MutationData.class); + + Map input = + IntStream.range(0, totalRecords) + .mapToObj(i -> new SinkRecord("", 1, null, null, null, null, i)) + .collect(Collectors.toMap(i -> i, ignored -> commonMutationData)); + + Map> fakeMutationData = mock(Map.class); + assertTrue(task.getBatchers().isEmpty()); + task.upsertRows(input, fakeMutationData); + assertEquals(Set.of(batcher), task.getBatchers().values().stream().collect(Collectors.toSet())); + + int expectedFullBatches = totalRecords / maxBatchSize; + int expectedPartialBatches = totalRecords % maxBatchSize == 0 ? 0 : 1; + + verify(task, times(expectedFullBatches)) + .performUpsertBatch(argThat(v -> v.size() == maxBatchSize), any()); + verify(task, times(expectedPartialBatches)) + .performUpsertBatch(argThat(v -> v.size() != maxBatchSize), any()); + } + + @Test + public void testPerformUpsertBatch() throws ExecutionException, InterruptedException { + String okTable = "okTable"; + String errorTable = "errorTable"; + + Batcher okBatcher = mock(Batcher.class); + doReturn(completedApiFuture(null)).when(okBatcher).add(any()); + Batcher errorBatcher = mock(Batcher.class); + doReturn(FutureUtil.failedApiFuture(new Exception())).when(errorBatcher).add(any()); + + doReturn(okBatcher).when(bigtableData).newBulkMutationBatcher(okTable); + doReturn(errorBatcher).when(bigtableData).newBulkMutationBatcher(errorTable); + task = new TestBigtableSinkTask(null, bigtableData, null, null, null, null, null); + + SinkRecord okRecord = new SinkRecord(okTable, 1, null, null, null, null, 1); + SinkRecord errorRecord = new SinkRecord(errorTable, 1, null, null, null, null, 2); + + MutationData okMutationData = mock(MutationData.class); + doReturn(okTable).when(okMutationData).getTargetTable(); + doReturn(mock(RowMutationEntry.class)).when(okMutationData).getUpsertMutation(); + MutationData errorMutationData = mock(MutationData.class); + doReturn(errorTable).when(errorMutationData).getTargetTable(); + doReturn(mock(RowMutationEntry.class)).when(errorMutationData).getUpsertMutation(); + + Map input = + Map.of( + okRecord, okMutationData, + errorRecord, errorMutationData); + Map> output = new HashMap<>(); + + assertTrue(task.getBatchers().isEmpty()); + task.performUpsertBatch(new ArrayList<>(input.entrySet()), output); + assertEquals( + Set.of(okBatcher, errorBatcher), + task.getBatchers().values().stream().collect(Collectors.toSet())); + + assertEquals(input.keySet(), output.keySet()); + verify(okBatcher, times(1)).add(any()); + verify(okBatcher, times(1)).sendOutstanding(); + assertTotalNumberOfInvocations(okBatcher, 2); + verify(errorBatcher, times(1)).add(any()); + verify(errorBatcher, times(1)).sendOutstanding(); + assertTotalNumberOfInvocations(errorBatcher, 2); + + output.get(okRecord).get(); + assertThrows(ExecutionException.class, () -> output.get(errorRecord).get()); + } + + @Test + public void testPutBranches() { + SinkRecord record1 = new SinkRecord("table1", 1, null, null, null, null, 1); + SinkRecord record2 = new SinkRecord("table2", 1, null, null, null, null, 2); + + for (List test : + List.of( + List.of(false, false, false), + List.of(false, false, true), + List.of(false, true, false), + List.of(false, true, true), + List.of(true, false, false), + List.of(true, false, true), + List.of(true, true, false), + List.of(true, true, true))) { + boolean autoCreateTables = test.get(0); + boolean autoCreateColumnFamilies = test.get(1); + boolean useInsertMode = test.get(2); + + Map props = BasicPropertiesFactory.getTaskProps(); + props.put(CONFIG_AUTO_CREATE_TABLES, Boolean.toString(autoCreateTables)); + props.put(CONFIG_AUTO_CREATE_COLUMN_FAMILIES, Boolean.toString(autoCreateColumnFamilies)); + props.put(CONFIG_INSERT_MODE, (useInsertMode ? InsertMode.INSERT : InsertMode.UPSERT).name()); + config = new BigtableSinkTaskConfig(props); + + byte[] rowKey = "rowKey".getBytes(StandardCharsets.UTF_8); + doReturn(rowKey).when(keyMapper).getKey(any()); + doAnswer( + i -> { + MutationDataBuilder builder = new MutationDataBuilder(); + builder.deleteRow(); + return builder; + }) + .when(valueMapper) + .getRecordMutationDataBuilder(any(), anyLong()); + + Batcher batcher = mock(Batcher.class); + doReturn(completedApiFuture(null)).when(batcher).add(any()); + doReturn(batcher).when(bigtableData).newBulkMutationBatcher(anyString()); + doReturn(new ResourceCreationResult(Collections.emptySet(), Collections.emptySet())) + .when(schemaManager) + .ensureTablesExist(any()); + doReturn(new ResourceCreationResult(Collections.emptySet(), Collections.emptySet())) + .when(schemaManager) + .ensureColumnFamiliesExist(any()); + + task = + spy( + new TestBigtableSinkTask( + config, bigtableData, null, keyMapper, valueMapper, schemaManager, null)); + + task.put(List.of(record1, record2)); + + verify(task, times(1)).prepareRecords(any()); + verify(schemaManager, times(autoCreateTables ? 1 : 0)).ensureTablesExist(any()); + verify(schemaManager, times(autoCreateColumnFamilies ? 1 : 0)) + .ensureColumnFamiliesExist(any()); + verify(task, times(useInsertMode ? 1 : 0)).insertRows(any(), any()); + verify(task, times(useInsertMode ? 0 : 1)).upsertRows(any(), any()); + verify(task, times(1)).handleResults(any()); + + reset(task); + reset(schemaManager); + } + } + + private static class TestBigtableSinkTask extends BigtableSinkTask { + public TestBigtableSinkTask( + BigtableSinkTaskConfig config, + BigtableDataClient bigtableData, + BigtableTableAdminClient bigtableAdmin, + KeyMapper keyMapper, + ValueMapper valueMapper, + BigtableSchemaManager schemaManager, + SinkTaskContext context) { + super(config, bigtableData, bigtableAdmin, keyMapper, valueMapper, schemaManager, context); + this.logger = mock(Logger.class); + } + + public Logger getLogger() { + return logger; + } + + public Map> getBatchers() { + return batchers; + } + } +} diff --git a/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/autocreate/BigtableSchemaManagerTest.java b/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/autocreate/BigtableSchemaManagerTest.java new file mode 100644 index 000000000..c8b6eaf45 --- /dev/null +++ b/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/autocreate/BigtableSchemaManagerTest.java @@ -0,0 +1,749 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.kafka.connect.bigtable.autocreate; + +import static com.google.cloud.kafka.connect.bigtable.util.FutureUtil.completedApiFuture; +import static com.google.cloud.kafka.connect.bigtable.util.FutureUtil.failedApiFuture; +import static com.google.cloud.kafka.connect.bigtable.util.MockUtil.assertTotalNumberOfInvocations; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.argThat; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.reset; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.verifyNoInteractions; + +import com.google.api.core.ApiFuture; +import com.google.cloud.bigtable.admin.v2.BigtableTableAdminClient; +import com.google.cloud.bigtable.admin.v2.models.ColumnFamily; +import com.google.cloud.bigtable.admin.v2.models.CreateTableRequest; +import com.google.cloud.bigtable.admin.v2.models.ModifyColumnFamiliesRequest; +import com.google.cloud.bigtable.admin.v2.models.Table; +import com.google.cloud.kafka.connect.bigtable.autocreate.BigtableSchemaManager.ResourceAndRecords; +import com.google.cloud.kafka.connect.bigtable.mapping.MutationData; +import com.google.cloud.kafka.connect.bigtable.util.ApiExceptionFactory; +import io.grpc.Status; +import java.util.AbstractMap; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.kafka.connect.errors.ConnectException; +import org.apache.kafka.connect.sink.SinkRecord; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +@RunWith(JUnit4.class) +public class BigtableSchemaManagerTest { + BigtableTableAdminClient bigtable; + TestBigtableSchemaManager bigtableSchemaManager; + + @Before + public void setUp() { + bigtable = mock(BigtableTableAdminClient.class); + bigtableSchemaManager = spy(new TestBigtableSchemaManager(bigtable)); + } + + @Test + public void testTableCachePopulationSuccess() { + List tables = List.of("table1", "table2"); + doReturn(tables).when(bigtable).listTables(); + bigtableSchemaManager.refreshTableNamesCache(); + assertEquals(new HashSet<>(tables), bigtableSchemaManager.getCache().keySet()); + assertTotalNumberOfInvocations(bigtable, 1); + + reset(bigtable); + verifyNoInteractions(bigtable); + Map input = + generateInput( + tables.stream() + .map(l -> new AbstractMap.SimpleImmutableEntry<>(l, Set.of("cf"))) + .collect(Collectors.toList())); + ResourceCreationResult result = bigtableSchemaManager.ensureTablesExist(input); + assertTrue(result.getBigtableErrors().isEmpty()); + assertTrue(result.getDataErrors().isEmpty()); + } + + @Test + public void testTableCachePopulationMayRemoveElements() { + List tables1 = List.of("table1", "table2"); + List tables2 = List.of(tables1.get(0)); + + doReturn(tables1).when(bigtable).listTables(); + bigtableSchemaManager.refreshTableNamesCache(); + assertEquals(new HashSet<>(tables1), bigtableSchemaManager.getCache().keySet()); + reset(bigtable); + + doReturn(tables2).when(bigtable).listTables(); + bigtableSchemaManager.refreshTableNamesCache(); + assertEquals(new HashSet<>(tables2), bigtableSchemaManager.getCache().keySet()); + verify(bigtable, times(1)).listTables(); + assertTotalNumberOfInvocations(bigtable, 1); + } + + @Test + public void testTableCachePopulationError() { + doThrow(ApiExceptionFactory.create()).when(bigtable).listTables(); + assertThrows(ConnectException.class, () -> bigtableSchemaManager.refreshTableNamesCache()); + } + + @Test + public void testTableColumnFamiliesCachePopulationSuccess() { + Map> tablesAndColumnFamilies = + Map.of( + "table1", Set.of("cf1", "cf2"), + "table2", Set.of("cf3", "cf4")); + doReturn(new ArrayList<>(tablesAndColumnFamilies.keySet())).when(bigtable).listTables(); + for (Map.Entry> entry : tablesAndColumnFamilies.entrySet()) { + mockGetTableSuccess(bigtable, entry.getKey(), entry.getValue()); + } + + Set refreshedTables = tablesAndColumnFamilies.keySet(); + bigtableSchemaManager.refreshTableColumnFamiliesCache(refreshedTables); + verify(bigtableSchemaManager, times(1)).refreshTableColumnFamiliesCache(refreshedTables); + verify(bigtableSchemaManager, times(1)).refreshTableNamesCache(); + assertTotalNumberOfInvocations(bigtableSchemaManager, 2); + assertEquals( + bigtableSchemaManager.getCache(), + tablesAndColumnFamilies.entrySet().stream() + .map(e -> new AbstractMap.SimpleImmutableEntry<>(e.getKey(), Optional.of(e.getValue()))) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))); + verify(bigtable, times(1)).listTables(); + for (String tableName : tablesAndColumnFamilies.keySet()) { + verify(bigtable, times(1)).getTableAsync(tableName); + } + // One for listTables() and one for each table lookup. + int expectedBigtableCalls = tablesAndColumnFamilies.size() + 1; + assertTotalNumberOfInvocations(bigtable, expectedBigtableCalls); + + reset(bigtable); + verifyNoInteractions(bigtable); + Map input = + generateInput(new ArrayList<>(tablesAndColumnFamilies.entrySet())); + ResourceCreationResult result = bigtableSchemaManager.ensureTablesExist(input); + assertTrue(result.getBigtableErrors().isEmpty()); + assertTrue(result.getDataErrors().isEmpty()); + } + + @Test + public void testTableColumnFamiliesCachePopulationErrors() { + doThrow(ApiExceptionFactory.create()).when(bigtable).listTables(); + assertThrows( + ConnectException.class, + () -> bigtableSchemaManager.refreshTableColumnFamiliesCache(Collections.emptySet())); + verify(bigtable, times(1)).listTables(); + reset(bigtable); + + String successTable = "table1"; + String errorTable = "table2"; + List allTables = List.of(successTable, errorTable); + + doReturn(allTables).when(bigtable).listTables(); + mockGetTableSuccess(bigtable, successTable, Collections.emptySet()); + // We simulate an error due to e.g., deletion of the table by another user. + doReturn(failedApiFuture(ApiExceptionFactory.create())) + .when(bigtable) + .getTableAsync(errorTable); + bigtableSchemaManager.refreshTableColumnFamiliesCache(new HashSet<>(allTables)); + assertEquals(Set.of(successTable), bigtableSchemaManager.getCache().keySet()); + verify(bigtable, times(1)).listTables(); + verify(bigtable, times(1)).getTableAsync(successTable); + verify(bigtable, times(1)).getTableAsync(errorTable); + assertTotalNumberOfInvocations(bigtable, 3); + } + + @Test + public void testEnsureTablesExistAllExisted() { + // Prepopulate the cache. + List tables = List.of("table1", "table2"); + doReturn(tables).when(bigtable).listTables(); + bigtableSchemaManager.refreshTableNamesCache(); + reset(bigtable); + + Map ensureTablesExistInput = + generateInput( + List.of( + new AbstractMap.SimpleImmutableEntry<>(tables.get(0), Set.of("missingCF")), + new AbstractMap.SimpleImmutableEntry<>(tables.get(0), Set.of("missingCF")), + new AbstractMap.SimpleImmutableEntry<>(tables.get(1), Set.of("missingCF")), + new AbstractMap.SimpleImmutableEntry<>(tables.get(1), Set.of("missingCF")))); + ResourceCreationResult result = bigtableSchemaManager.ensureTablesExist(ensureTablesExistInput); + assertTrue(result.getBigtableErrors().isEmpty()); + assertTrue(result.getDataErrors().isEmpty()); + assertTotalNumberOfInvocations(bigtable, 0); + } + + @Test + public void testEnsureTablesExistAllCreatedSuccessfully() { + List tables = List.of("table1", "table2"); + // We call listTables() only once, after sending all the create requests. In this case all the + // requests were successful. + doReturn(tables).when(bigtable).listTables(); + for (String table : tables) { + mockCreateTableSuccess(bigtable, table, Collections.emptySet()); + } + + assertTrue(bigtableSchemaManager.getCache().isEmpty()); + Map ensureTablesExistInput = + generateInput( + List.of( + new AbstractMap.SimpleImmutableEntry<>(tables.get(0), Set.of("missingCF")), + new AbstractMap.SimpleImmutableEntry<>(tables.get(0), Set.of("missingCF")), + new AbstractMap.SimpleImmutableEntry<>(tables.get(1), Set.of("missingCF")), + new AbstractMap.SimpleImmutableEntry<>(tables.get(1), Set.of("missingCF")))); + ResourceCreationResult result = bigtableSchemaManager.ensureTablesExist(ensureTablesExistInput); + assertTrue(result.getBigtableErrors().isEmpty()); + assertTrue(result.getDataErrors().isEmpty()); + for (String table : tables) { + assertTrue(bigtableSchemaManager.getCache().containsKey(table)); + verify(bigtable, times(1)) + .createTableAsync(argThat(ctr -> createTableMockRefersTable(table, ctr))); + } + + // One for each table creation and one for result check. + int expectedBigtableCalls = tables.size() + 1; + assertTotalNumberOfInvocations(bigtable, expectedBigtableCalls); + } + + @Test + public void testEnsureTablesExistSomeCreatedSuccessfullySomeErrorsDueToRaces() { + List tables = List.of("table1", "table2"); + // We call listTables() only once, after sending all the create requests. In this case some + // requests failed since another thread concurrently created one of these tables. + doReturn(tables).when(bigtable).listTables(); + String tableWhoseCreationFailed = tables.get(1); + for (String table : tables) { + if (!table.equals(tableWhoseCreationFailed)) { + mockCreateTableSuccess(bigtable, table, Collections.emptySet()); + } + } + doReturn(failedApiFuture(ApiExceptionFactory.create())) + .when(bigtable) + .createTableAsync( + argThat(ctr -> createTableMockRefersTable(tableWhoseCreationFailed, ctr))); + + assertTrue(bigtableSchemaManager.getCache().isEmpty()); + Map ensureTablesExistInput = + generateInput( + List.of( + new AbstractMap.SimpleImmutableEntry<>(tables.get(0), Set.of("missingCF")), + new AbstractMap.SimpleImmutableEntry<>(tables.get(0), Set.of("missingCF")), + new AbstractMap.SimpleImmutableEntry<>(tables.get(1), Set.of("missingCF")), + new AbstractMap.SimpleImmutableEntry<>(tables.get(1), Set.of("missingCF")))); + ResourceCreationResult result = bigtableSchemaManager.ensureTablesExist(ensureTablesExistInput); + assertTrue(result.getBigtableErrors().isEmpty()); + assertTrue(result.getDataErrors().isEmpty()); + for (String table : tables) { + assertTrue(bigtableSchemaManager.getCache().containsKey(table)); + verify(bigtable, times(1)) + .createTableAsync(argThat(ctr -> createTableMockRefersTable(table, ctr))); + } + + // One for each table creation and one for result check. + int expectedBigtableCalls = tables.size() + 1; + assertTotalNumberOfInvocations(bigtable, expectedBigtableCalls); + } + + @Test + public void testEnsureTablesExistSomeCreatedSuccessfullySomeErrors() { + String successfulTable = "table1"; + String bigtableErrorTable = "table2"; + String dataErrorTable = "table3"; + Set columnFamilies = Set.of("cf1"); + + doReturn(List.of(successfulTable)).when(bigtable).listTables(); + mockCreateTableSuccess(bigtable, successfulTable, columnFamilies); + doReturn(failedApiFuture(ApiExceptionFactory.create(Status.Code.RESOURCE_EXHAUSTED))) + .when(bigtable) + .createTableAsync(argThat(ctr -> createTableMockRefersTable(bigtableErrorTable, ctr))); + doReturn(failedApiFuture(ApiExceptionFactory.create(Status.Code.INVALID_ARGUMENT))) + .when(bigtable) + .createTableAsync(argThat(ctr -> createTableMockRefersTable(dataErrorTable, ctr))); + + assertTrue(bigtableSchemaManager.getCache().isEmpty()); + Map ensureTablesExistInput = + generateInput( + List.of( + new AbstractMap.SimpleImmutableEntry<>(successfulTable, columnFamilies), + new AbstractMap.SimpleImmutableEntry<>(successfulTable, columnFamilies), + new AbstractMap.SimpleImmutableEntry<>(bigtableErrorTable, columnFamilies), + new AbstractMap.SimpleImmutableEntry<>(bigtableErrorTable, columnFamilies), + new AbstractMap.SimpleImmutableEntry<>(dataErrorTable, columnFamilies), + new AbstractMap.SimpleImmutableEntry<>(dataErrorTable, columnFamilies))); + ResourceCreationResult result = bigtableSchemaManager.ensureTablesExist(ensureTablesExistInput); + Set bigtableErrors = result.getBigtableErrors(); + Set dataErrors = result.getDataErrors(); + assertEquals( + ensureTablesExistInput.entrySet().stream() + .filter(e -> e.getValue().getTargetTable().equals(bigtableErrorTable)) + .map(Map.Entry::getKey) + .collect(Collectors.toSet()), + bigtableErrors); + assertEquals( + ensureTablesExistInput.entrySet().stream() + .filter(e -> e.getValue().getTargetTable().equals(dataErrorTable)) + .map(Map.Entry::getKey) + .collect(Collectors.toSet()), + dataErrors); + Map>> cache = bigtableSchemaManager.getCache(); + assertTrue(cache.containsKey(successfulTable)); + verify(bigtable, times(1)) + .createTableAsync(argThat(ctr -> createTableMockRefersTable(successfulTable, ctr))); + assertFalse(cache.containsKey(bigtableErrorTable)); + verify(bigtable, times(1)) + .createTableAsync(argThat(ctr -> createTableMockRefersTable(bigtableErrorTable, ctr))); + assertFalse(cache.containsKey(dataErrorTable)); + verify(bigtable, times(1)) + .createTableAsync(argThat(ctr -> createTableMockRefersTable(dataErrorTable, ctr))); + + // One for each table creation and one for result check. + int expectedBigtableCalls = 4; + assertTotalNumberOfInvocations(bigtable, expectedBigtableCalls); + } + + @Test + public void testEnsureTablesExistConcurrentDeletion() { + String createdTable = "table1"; + String createdAndThenConcurrentlyDeletedTable = "table2"; + Set columnFamilies = Set.of("cf1"); + + // Note that only a single table is returned - we simulate concurrent deletion of the other + // table. + doAnswer(ignored -> List.of(createdTable)).when(bigtable).listTables(); + mockCreateTableSuccess(bigtable, createdTable, columnFamilies); + mockCreateTableSuccess(bigtable, createdAndThenConcurrentlyDeletedTable, columnFamilies); + + assertTrue(bigtableSchemaManager.getCache().isEmpty()); + Map ensureTablesExistInput = + generateInput( + List.of( + new AbstractMap.SimpleImmutableEntry<>(createdTable, columnFamilies), + new AbstractMap.SimpleImmutableEntry<>(createdTable, columnFamilies), + new AbstractMap.SimpleImmutableEntry<>( + createdAndThenConcurrentlyDeletedTable, columnFamilies), + new AbstractMap.SimpleImmutableEntry<>( + createdAndThenConcurrentlyDeletedTable, columnFamilies))); + ResourceCreationResult result = bigtableSchemaManager.ensureTablesExist(ensureTablesExistInput); + assertTrue(result.getDataErrors().isEmpty()); + Set missingTables = result.getBigtableErrors(); + assertEquals( + ensureTablesExistInput.entrySet().stream() + .filter( + e -> e.getValue().getTargetTable().equals(createdAndThenConcurrentlyDeletedTable)) + .map(Map.Entry::getKey) + .collect(Collectors.toSet()), + missingTables); + Map>> cache = bigtableSchemaManager.getCache(); + assertTrue(cache.containsKey(createdTable)); + verify(bigtable, times(1)) + .createTableAsync(argThat(ctr -> createTableMockRefersTable(createdTable, ctr))); + assertFalse(cache.containsKey(createdAndThenConcurrentlyDeletedTable)); + verify(bigtable, times(1)) + .createTableAsync( + argThat( + ctr -> createTableMockRefersTable(createdAndThenConcurrentlyDeletedTable, ctr))); + + // One for each table creation and one for result check. + int expectedBigtableCalls = 3; + assertTotalNumberOfInvocations(bigtable, expectedBigtableCalls); + } + + @Test + public void testEnsureColumnFamiliesExistAllExisted() { + Map> tablesAndColumnFamilies = + Map.of( + "table1", Set.of("cf1", "cf2"), + "table2", Set.of("cf3", "cf4")); + doReturn(new ArrayList<>(tablesAndColumnFamilies.keySet())).when(bigtable).listTables(); + for (Map.Entry> entry : tablesAndColumnFamilies.entrySet()) { + mockGetTableSuccess(bigtable, entry.getKey(), entry.getValue()); + } + Set refreshedTables = tablesAndColumnFamilies.keySet(); + bigtableSchemaManager.refreshTableColumnFamiliesCache(refreshedTables); + reset(bigtable); + verifyNoInteractions(bigtable); + + Map ensureColumnFamiliesExistInput = + generateInput(new ArrayList<>(tablesAndColumnFamilies.entrySet())); + ResourceCreationResult result = + bigtableSchemaManager.ensureColumnFamiliesExist(ensureColumnFamiliesExistInput); + assertTrue(result.getDataErrors().isEmpty()); + assertTrue(result.getBigtableErrors().isEmpty()); + } + + @Test + public void testEnsureColumnFamiliesExistAllCreatedSuccessfully() { + Map> tablesAndColumnFamilies = + Map.of( + "table1", Set.of("cf1", "cf2"), + "table2", Set.of("cf3", "cf4")); + doReturn(new ArrayList<>(tablesAndColumnFamilies.keySet())).when(bigtable).listTables(); + for (Map.Entry> entry : tablesAndColumnFamilies.entrySet()) { + mockCreateColumnFamilySuccess(bigtable, entry.getKey(), entry.getValue()); + mockGetTableSuccess(bigtable, entry.getKey(), entry.getValue()); + } + Map ensureColumnFamiliesExistInput = + generateInput(new ArrayList<>(tablesAndColumnFamilies.entrySet())); + + ResourceCreationResult result = + bigtableSchemaManager.ensureColumnFamiliesExist(ensureColumnFamiliesExistInput); + assertTrue(result.getDataErrors().isEmpty()); + assertTrue(result.getBigtableErrors().isEmpty()); + for (Map.Entry> entry : tablesAndColumnFamilies.entrySet()) { + String tableName = entry.getKey(); + for (String columnFamily : entry.getValue()) { + verify(bigtable, times(1)) + .modifyFamiliesAsync( + argThat( + mcfr -> + createColumnFamilyMockRefersTableAndColumnFamily( + tableName, columnFamily, mcfr))); + } + verify(bigtable, times(1)).getTableAsync(tableName); + } + int expectedBigtableInteractions = + 1 // listTables() + + tablesAndColumnFamilies.values().stream() + .mapToInt(Set::size) + .sum() // modifyColumnFamily() + + tablesAndColumnFamilies.keySet().size(); // getTable() + assertTotalNumberOfInvocations(bigtable, expectedBigtableInteractions); + } + + @Test + public void + testEnsureColumnFamiliesExistSomeCreatedSuccessfullySomeErrorsDueToRacesOrInvalidRequests() { + String successTable = "table1"; + String bigtableErrorTable = "table2"; + String dataErrorTable = "table3"; + String invalidArgumentColumnFamilyName = "INVALID_ARGUMENT_COLUMN_FAMILY_NAME"; + Map> tablesAndColumnFamilies = + Map.of( + successTable, Set.of("cf1", "cf2"), + bigtableErrorTable, Set.of("cf3", "cf4"), + dataErrorTable, Set.of("cf5", invalidArgumentColumnFamilyName)); + doReturn(new ArrayList<>(tablesAndColumnFamilies.keySet())).when(bigtable).listTables(); + for (Map.Entry> entry : tablesAndColumnFamilies.entrySet()) { + String table = entry.getKey(); + for (String columnFamily : entry.getValue()) { + if (table.equals(bigtableErrorTable)) { + doReturn(failedApiFuture(ApiExceptionFactory.create(Status.Code.RESOURCE_EXHAUSTED))) + .when(bigtable) + .modifyFamiliesAsync( + argThat( + mcfr -> + createColumnFamilyMockRefersTableAndColumnFamily( + table, columnFamily, mcfr))); + } else if (table.equals(dataErrorTable)) { + doReturn(failedApiFuture(ApiExceptionFactory.create(Status.Code.INVALID_ARGUMENT))) + .when(bigtable) + .modifyFamiliesAsync( + argThat( + mcfr -> + createColumnFamilyMockRefersTableAndColumnFamily( + table, columnFamily, mcfr))); + } else { + mockCreateColumnFamilySuccess(bigtable, entry.getKey(), entry.getValue()); + } + } + Set columnFamilies = new HashSet<>(entry.getValue()); + columnFamilies.remove(invalidArgumentColumnFamilyName); + mockGetTableSuccess(bigtable, table, columnFamilies); + } + Map ensureColumnFamiliesExistInput = + generateInput(new ArrayList<>(tablesAndColumnFamilies.entrySet())); + ResourceCreationResult result = + bigtableSchemaManager.ensureColumnFamiliesExist(ensureColumnFamiliesExistInput); + assertTrue(result.getBigtableErrors().isEmpty()); + Set missingColumnFamilies = result.getDataErrors(); + assertEquals( + ensureColumnFamiliesExistInput.entrySet().stream() + .filter(e -> e.getValue().getTargetTable().equals(dataErrorTable)) + .map(Map.Entry::getKey) + .collect(Collectors.toSet()), + missingColumnFamilies); + + for (Map.Entry> entry : tablesAndColumnFamilies.entrySet()) { + String tableName = entry.getKey(); + for (String columnFamily : entry.getValue()) { + verify(bigtable, times(1)) + .modifyFamiliesAsync( + argThat( + mcfr -> + createColumnFamilyMockRefersTableAndColumnFamily( + tableName, columnFamily, mcfr))); + } + verify(bigtable, times(1)).getTableAsync(tableName); + } + int expectedBigtableInteractions = + 1 // listTables() + + tablesAndColumnFamilies.values().stream() + .mapToInt(Set::size) + .sum() // modifyColumnFamily() + + tablesAndColumnFamilies.keySet().size(); // getTable() + assertTotalNumberOfInvocations(bigtable, expectedBigtableInteractions); + } + + @Test + public void testEnsureColumnFamiliesExistSomeSomeErrorsDueToConcurrentColumnFamilyDeletion() { + String successTable = "table1"; + String errorTable = "table2"; + Map> tablesAndColumnFamilies = + Map.of( + successTable, Set.of("cf1", "cf2"), + errorTable, Set.of("cf3", "cf4")); + doReturn(new ArrayList<>(tablesAndColumnFamilies.keySet())).when(bigtable).listTables(); + for (Map.Entry> entry : tablesAndColumnFamilies.entrySet()) { + String table = entry.getKey(); + mockCreateColumnFamilySuccess(bigtable, table, entry.getValue()); + if (table.equals(errorTable)) { + doReturn(failedApiFuture(ApiExceptionFactory.create())).when(bigtable).getTableAsync(table); + } else { + mockGetTableSuccess(bigtable, table, entry.getValue()); + } + } + Map ensureColumnFamiliesExistInput = + generateInput(new ArrayList<>(tablesAndColumnFamilies.entrySet())); + ResourceCreationResult result = + bigtableSchemaManager.ensureColumnFamiliesExist(ensureColumnFamiliesExistInput); + assertTrue(result.getDataErrors().isEmpty()); + Set missingColumnFamilies = result.getBigtableErrors(); + assertEquals(1, missingColumnFamilies.size()); + assertEquals( + ensureColumnFamiliesExistInput.entrySet().stream() + .filter(e -> e.getValue().getTargetTable().equals(errorTable)) + .findFirst() + .get() + .getKey(), + missingColumnFamilies.stream().findFirst().get()); + + for (Map.Entry> entry : tablesAndColumnFamilies.entrySet()) { + String tableName = entry.getKey(); + for (String columnFamily : entry.getValue()) { + verify(bigtable, times(1)) + .modifyFamiliesAsync( + argThat( + mcfr -> + createColumnFamilyMockRefersTableAndColumnFamily( + tableName, columnFamily, mcfr))); + } + verify(bigtable, times(1)).getTableAsync(tableName); + } + int expectedBigtableInteractions = + 1 // listTables() + + tablesAndColumnFamilies.values().stream() + .mapToInt(Set::size) + .sum() // modifyColumnFamily() + + tablesAndColumnFamilies.keySet().size(); // getTable() + assertTotalNumberOfInvocations(bigtable, expectedBigtableInteractions); + } + + @Test + public void testEnsureColumnFamiliesExistMissingTable() { + String successTable = "table1"; + String errorTable = "table2"; + Map> tablesAndColumnFamilies = + Map.of( + successTable, Set.of("cf1", "cf2"), + errorTable, Set.of("cf3", "cf4")); + doReturn(List.of(successTable)).when(bigtable).listTables(); + for (Map.Entry> entry : tablesAndColumnFamilies.entrySet()) { + String table = entry.getKey(); + mockGetTableSuccess(bigtable, table, entry.getValue()); + if (table.equals(errorTable)) { + doReturn(failedApiFuture(ApiExceptionFactory.create())) + .when(bigtable) + .modifyFamiliesAsync(argThat(mcfr -> createColumnFamilyMockRefersTable(table, mcfr))); + } else { + mockCreateColumnFamilySuccess(bigtable, table, entry.getValue()); + } + } + Map ensureColumnFamiliesExistInput = + generateInput(new ArrayList<>(tablesAndColumnFamilies.entrySet())); + ResourceCreationResult result = + bigtableSchemaManager.ensureColumnFamiliesExist(ensureColumnFamiliesExistInput); + assertTrue(result.getDataErrors().isEmpty()); + Set missingColumnFamilies = result.getBigtableErrors(); + assertEquals(1, missingColumnFamilies.size()); + assertEquals( + ensureColumnFamiliesExistInput.entrySet().stream() + .filter(e -> e.getValue().getTargetTable().equals(errorTable)) + .findFirst() + .get() + .getKey(), + missingColumnFamilies.stream().findFirst().get()); + + for (Map.Entry> entry : tablesAndColumnFamilies.entrySet()) { + String table = entry.getKey(); + for (String columnFamily : entry.getValue()) { + verify(bigtable, times(1)) + .modifyFamiliesAsync( + argThat( + mcfr -> + createColumnFamilyMockRefersTableAndColumnFamily( + table, columnFamily, mcfr))); + } + if (!table.equals(errorTable)) { + verify(bigtable, times(1)).getTableAsync(table); + } + } + int expectedBigtableInteractions = + 1 // listTables() + + tablesAndColumnFamilies.values().stream() + .mapToInt(Set::size) + .sum() // modifyColumnFamily() + + 1; // getTable() + assertTotalNumberOfInvocations(bigtable, expectedBigtableInteractions); + } + + @Test + public void testErrorsCreatingColumnFamilies() {} + + @Test + public void testAwaitResourceCreationAndHandleInvalidInputErrors() { + int uniqueKafkaOffset = 0; + SinkRecord ok1 = spoofSinkRecord("topic1", uniqueKafkaOffset++); + SinkRecord ok2 = spoofSinkRecord("topic2", uniqueKafkaOffset++); + SinkRecord dataError1 = spoofSinkRecord("topic3", uniqueKafkaOffset++); + SinkRecord dataError2 = spoofSinkRecord("topic4", uniqueKafkaOffset++); + SinkRecord bigtableError1 = spoofSinkRecord("topic5", uniqueKafkaOffset++); + SinkRecord bigtableError2 = spoofSinkRecord("topic6", uniqueKafkaOffset++); + + ResourceAndRecords ok = new ResourceAndRecords("ok", List.of(ok1, ok2)); + ResourceAndRecords dataError = + new ResourceAndRecords("data", List.of(dataError1, dataError2)); + ResourceAndRecords bigtableError = + new ResourceAndRecords("bigtable", List.of(bigtableError1, bigtableError2)); + + Map, ResourceAndRecords> input = + Map.of( + completedApiFuture(null), ok, + failedApiFuture(ApiExceptionFactory.create(Status.Code.INVALID_ARGUMENT)), dataError, + failedApiFuture(ApiExceptionFactory.create(Status.Code.RESOURCE_EXHAUSTED)), + bigtableError); + + Set dataErrors = + bigtableSchemaManager.awaitResourceCreationAndHandleInvalidInputErrors(input, "%s"); + assertEquals(new HashSet<>(dataError.getRecords()), dataErrors); + verify(bigtableSchemaManager.logger, times(1)) + .info(eq(bigtableError.getResource()), any(Throwable.class)); + } + + private static Map generateInput( + List>> records) { + int uniqueKafkaOffset = 1; + Map result = new HashMap<>(); + for (Map.Entry> record : records) { + SinkRecord sinkRecord = spoofSinkRecord("topic" + record.getKey(), uniqueKafkaOffset++); + MutationData mutationData = spoofSinkRecordOutput(record.getKey(), record.getValue()); + result.put(sinkRecord, mutationData); + } + return result; + } + + private static SinkRecord spoofSinkRecord(String topic, int uniqueKafkaOffset) { + return new SinkRecord(topic, 1, null, new Object(), null, new Object(), uniqueKafkaOffset); + } + + private static MutationData spoofSinkRecordOutput( + String targetTable, Set columnFamilies) { + return new MutationData(targetTable, null, null, columnFamilies); + } + + private boolean createTableMockRefersTable(String tableName, CreateTableRequest ctr) { + return tableName.equals(ctr.toProto("unused", "unused").getTableId()); + } + + private boolean createColumnFamilyMockRefersTable( + String tableName, ModifyColumnFamiliesRequest mcfr) { + // getName() returns whole table id comprising project ID, instance ID, table name, ... + return mcfr.toProto("unused", "unused").getName().endsWith("/" + tableName); + } + + private boolean createColumnFamilyMockRefersTableAndColumnFamily( + String tableName, String columnFamily, ModifyColumnFamiliesRequest mcfr) { + boolean refersTable = createColumnFamilyMockRefersTable(tableName, mcfr); + List modifications = + mcfr.toProto("unused", "unused").getModificationsList(); + return refersTable + && modifications.stream() + .filter( + com.google.bigtable.admin.v2.ModifyColumnFamiliesRequest.Modification::hasCreate) + .anyMatch(m -> columnFamily.equals(m.getId())); + } + + private void mockCreateTableSuccess( + BigtableTableAdminClient bigtable, String tableName, Set tableColumnFamilies) { + Table table = mockTable(tableName, tableColumnFamilies); + doAnswer(ignored -> completedApiFuture(table)) + .when(bigtable) + .createTableAsync(argThat(ctr -> createTableMockRefersTable(tableName, ctr))); + } + + private void mockCreateColumnFamilySuccess( + BigtableTableAdminClient bigtable, String tableName, Set tableColumnFamilies) { + Table table = mockTable(tableName, tableColumnFamilies); + doAnswer(ignored -> completedApiFuture(table)) + .when(bigtable) + .modifyFamiliesAsync(argThat(mcfr -> createColumnFamilyMockRefersTable(tableName, mcfr))); + } + + private void mockGetTableSuccess( + BigtableTableAdminClient bigtable, String tableName, Set tableColumnFamilies) { + Table table = mockTable(tableName, tableColumnFamilies); + doAnswer(ignored -> completedApiFuture(table)).when(bigtable).getTableAsync(tableName); + } + + private Table mockTable(String tableName, Set tableColumnFamilies) { + List columnFamilies = new ArrayList<>(); + for (String tableColumnFamily : tableColumnFamilies) { + ColumnFamily columnFamily = mock(ColumnFamily.class); + doReturn(tableColumnFamily).when(columnFamily).getId(); + columnFamilies.add(columnFamily); + } + Table table = mock(Table.class); + doReturn(tableName).when(table).getId(); + doReturn(columnFamilies).when(table).getColumnFamilies(); + return table; + } + + private static class TestBigtableSchemaManager extends BigtableSchemaManager { + public TestBigtableSchemaManager(BigtableTableAdminClient bigtable) { + super(bigtable); + this.logger = spy(this.logger); + } + + public Map>> getCache() { + return tableNameToColumnFamilies; + } + } +} diff --git a/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/autocreate/SchemaApiExceptionsTest.java b/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/autocreate/SchemaApiExceptionsTest.java new file mode 100644 index 000000000..b01595f18 --- /dev/null +++ b/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/autocreate/SchemaApiExceptionsTest.java @@ -0,0 +1,143 @@ +/* + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.kafka.connect.bigtable.autocreate; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import com.google.api.gax.grpc.GrpcStatusCode; +import com.google.api.gax.rpc.ApiException; +import com.google.api.gax.rpc.StatusCode; +import com.google.api.gax.rpc.StatusCode.Code; +import com.google.cloud.kafka.connect.bigtable.util.ApiExceptionFactory; +import io.grpc.Status; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ExecutionException; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +@RunWith(JUnit4.class) +public class SchemaApiExceptionsTest { + @Test + public void testIsStatusCodeCausedByInputError() { + for (Code causedByInputError : + List.of(Code.INVALID_ARGUMENT, Code.OUT_OF_RANGE, Code.FAILED_PRECONDITION)) { + assertTrue( + BigtableSchemaManager.SchemaApiExceptions.isStatusCodeCausedByInputError( + causedByInputError)); + } + + for (Code notCausedByInputError : + List.of(Code.NOT_FOUND, Code.RESOURCE_EXHAUSTED, Code.CANCELLED, Code.UNKNOWN)) { + assertFalse( + BigtableSchemaManager.SchemaApiExceptions.isStatusCodeCausedByInputError( + notCausedByInputError)); + } + } + + @Test + public void testExtractStatusCodeNonempty() { + StatusCode code = GrpcStatusCode.of(Status.Code.RESOURCE_EXHAUSTED); + ApiException apiException = ApiExceptionFactory.create(new Throwable(), code, true); + Throwable one = new Throwable(apiException); + Throwable two = new Throwable(one); + Throwable three = new Throwable(two); + for (Throwable t : List.of(apiException, one, two, three)) { + assertTrue( + BigtableSchemaManager.SchemaApiExceptions.maybeExtractBigtableStatusCode(t).isPresent()); + } + } + + @Test + public void testExtractGetsTheFirstStatusCode() { + StatusCode causeCode = GrpcStatusCode.of(Status.Code.RESOURCE_EXHAUSTED); + ApiException cause = ApiExceptionFactory.create(new Throwable(), causeCode, true); + StatusCode exceptionCode = GrpcStatusCode.of(Status.Code.INVALID_ARGUMENT); + ApiException exception = ApiExceptionFactory.create(cause, exceptionCode, false); + + assertEquals( + causeCode, + BigtableSchemaManager.SchemaApiExceptions.maybeExtractBigtableStatusCode(cause).get()); + assertEquals( + exceptionCode, + BigtableSchemaManager.SchemaApiExceptions.maybeExtractBigtableStatusCode(exception).get()); + } + + @Test + public void testExtractStatusCodeEmpty() { + Throwable one = new Throwable(); + Throwable two = new Throwable(one); + Throwable three = new Throwable(two); + + for (Throwable t : List.of(one, two, three)) { + assertTrue( + BigtableSchemaManager.SchemaApiExceptions.maybeExtractBigtableStatusCode(t).isEmpty()); + } + assertTrue( + BigtableSchemaManager.SchemaApiExceptions.maybeExtractBigtableStatusCode(null).isEmpty()); + } + + @Test + public void testIsCausedByInputError() { + assertFalse(BigtableSchemaManager.SchemaApiExceptions.isCausedByInputError(null)); + + ApiException inputErrorException = + ApiExceptionFactory.create( + new Throwable(), GrpcStatusCode.of(Status.Code.INVALID_ARGUMENT), false); + ApiException bigtableErrorException = + ApiExceptionFactory.create(new Throwable(), GrpcStatusCode.of(Status.Code.UNKNOWN), false); + + assertTrue(BigtableSchemaManager.SchemaApiExceptions.isCausedByInputError(inputErrorException)); + assertTrue( + BigtableSchemaManager.SchemaApiExceptions.isCausedByInputError( + new ExecutionException(inputErrorException))); + assertTrue( + BigtableSchemaManager.SchemaApiExceptions.isCausedByInputError( + new ExecutionException(new Throwable(inputErrorException)))); + + assertFalse( + BigtableSchemaManager.SchemaApiExceptions.isCausedByInputError(bigtableErrorException)); + assertFalse( + BigtableSchemaManager.SchemaApiExceptions.isCausedByInputError( + new ExecutionException(bigtableErrorException))); + assertFalse( + BigtableSchemaManager.SchemaApiExceptions.isCausedByInputError( + new ExecutionException(new Throwable(bigtableErrorException)))); + } + + @Test + public void testIsCausedByInputErrorIgnoresRetriableField() { + for (Map.Entry testCase : + Map.of( + Status.Code.INVALID_ARGUMENT, true, + Status.Code.RESOURCE_EXHAUSTED, false) + .entrySet()) { + Status.Code code = testCase.getKey(); + Boolean expectedResult = testCase.getValue(); + + for (Boolean retryableField : List.of(true, false)) { + ApiException e = + ApiExceptionFactory.create(new Throwable(), GrpcStatusCode.of(code), retryableField); + assertEquals( + expectedResult, BigtableSchemaManager.SchemaApiExceptions.isCausedByInputError(e)); + } + } + ; + } +} diff --git a/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/config/BigtableSinkConfigTest.java b/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/config/BigtableSinkConfigTest.java new file mode 100644 index 000000000..a2c462924 --- /dev/null +++ b/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/config/BigtableSinkConfigTest.java @@ -0,0 +1,157 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.kafka.connect.bigtable.config; + +import static com.google.cloud.kafka.connect.bigtable.config.BigtableSinkConfig.CONFIG_AUTO_CREATE_COLUMN_FAMILIES; +import static com.google.cloud.kafka.connect.bigtable.config.BigtableSinkConfig.CONFIG_AUTO_CREATE_TABLES; +import static com.google.cloud.kafka.connect.bigtable.config.BigtableSinkConfig.CONFIG_ERROR_MODE; +import static com.google.cloud.kafka.connect.bigtable.config.BigtableSinkConfig.CONFIG_INSERT_MODE; +import static com.google.cloud.kafka.connect.bigtable.config.BigtableSinkConfig.CONFIG_MAX_BATCH_SIZE; +import static com.google.cloud.kafka.connect.bigtable.config.BigtableSinkConfig.CONFIG_TABLE_NAME_FORMAT; +import static com.google.cloud.kafka.connect.bigtable.config.BigtableSinkConfig.CONFIG_VALUE_NULL_MODE; +import static java.util.Collections.emptyList; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; + +import com.google.cloud.bigtable.admin.v2.BigtableTableAdminClient; +import com.google.cloud.kafka.connect.bigtable.util.BasicPropertiesFactory; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.apache.kafka.common.config.ConfigException; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +@RunWith(JUnit4.class) +public class BigtableSinkConfigTest { + public static boolean configIsValid(BigtableSinkConfig config) { + return BigtableSinkConfig.validate(config.originalsStrings(), false).configValues().stream() + .allMatch(v -> v.errorMessages().isEmpty()); + } + + @Test + public void testBasicSuccess() { + BigtableSinkConfig config = new BigtableSinkConfig(BasicPropertiesFactory.getSinkProps()); + assertTrue(configIsValid(config)); + } + + @Test + public void testBasicValidationFailure() { + assertThrows(ConfigException.class, () -> new BigtableSinkConfig(new HashMap<>())); + for (String configName : + List.of( + CONFIG_TABLE_NAME_FORMAT, + CONFIG_AUTO_CREATE_TABLES, + CONFIG_AUTO_CREATE_COLUMN_FAMILIES)) { + Map props = BasicPropertiesFactory.getSinkProps(); + props.put(configName, null); + assertThrows(ConfigException.class, () -> new BigtableSinkConfig(new HashMap<>())); + } + for (String configName : + List.of(CONFIG_INSERT_MODE, CONFIG_VALUE_NULL_MODE, CONFIG_ERROR_MODE)) { + Map props = BasicPropertiesFactory.getSinkProps(); + props.put(configName, "invalid"); + assertThrows(ConfigException.class, () -> new BigtableSinkConfig(new HashMap<>())); + } + } + + @Test + public void testDefaults() { + BigtableSinkConfig config = new BigtableSinkConfig(BasicPropertiesFactory.getSinkProps()); + assertEquals(config.getString(CONFIG_INSERT_MODE), InsertMode.INSERT.name()); + assertEquals((long) config.getInt(CONFIG_MAX_BATCH_SIZE), 1); + assertEquals(config.getString(CONFIG_VALUE_NULL_MODE), NullValueMode.WRITE.name()); + } + + @Test + public void testMultipleValuesValidationInsert() { + Map props = BasicPropertiesFactory.getSinkProps(); + props.put(BigtableSinkConfig.CONFIG_INSERT_MODE, InsertMode.INSERT.name()); + props.put(BigtableSinkConfig.CONFIG_MAX_BATCH_SIZE, "2"); + BigtableSinkConfig config = new BigtableSinkConfig(props); + assertFalse(configIsValid(config)); + } + + @Test + public void testMultipleValuesValidationCredentials() { + Map props = BasicPropertiesFactory.getSinkProps(); + props.put(BigtableSinkConfig.CONFIG_GCP_CREDENTIALS_JSON, "nonempty"); + props.put(BigtableSinkConfig.CONFIG_GCP_CREDENTIALS_PATH, "nonempty"); + BigtableSinkConfig config = new BigtableSinkConfig(props); + assertFalse(configIsValid(config)); + } + + @Test + public void testGetBigtableDataClient() { + BigtableSinkConfig config = new BigtableSinkConfig(BasicPropertiesFactory.getSinkProps()); + config.getBigtableDataClient(); + } + + @Test + public void testGetBigtableAdminClient() { + BigtableSinkConfig config = new BigtableSinkConfig(BasicPropertiesFactory.getSinkProps()); + config.getBigtableAdminClient(); + } + + @Test + public void testEnumCaseInsensitivity() { + Map props = BasicPropertiesFactory.getSinkProps(); + props.put(CONFIG_INSERT_MODE, "uPsErT"); + props.put(CONFIG_ERROR_MODE, "IGNORE"); + props.put(CONFIG_VALUE_NULL_MODE, "delete"); + BigtableSinkConfig config = new BigtableSinkConfig(props); + } + + @Test + public void testIsBigtableConfigurationValidBasicSuccess() { + Map props = BasicPropertiesFactory.getSinkProps(); + BigtableSinkConfig config = spy(new BigtableSinkConfig(props)); + BigtableTableAdminClient bigtable = mock(BigtableTableAdminClient.class); + doReturn(emptyList()).when(bigtable).listTables(); + doReturn(bigtable).when(config).getBigtableAdminClient(any()); + assertTrue(config.isBigtableConfigurationValid()); + verify(bigtable, times(1)).close(); + } + + @Test + public void testIsBigtableConfigurationValidClientConstructorError() { + Map props = BasicPropertiesFactory.getSinkProps(); + BigtableSinkConfig config = spy(new BigtableSinkConfig(props)); + doThrow(new RuntimeException()).when(config).getBigtableAdminClient(); + assertFalse(config.isBigtableConfigurationValid()); + } + + @Test + public void testIsBigtableConfigurationValidOperationError() { + Map props = BasicPropertiesFactory.getSinkProps(); + BigtableSinkConfig config = spy(new BigtableSinkConfig(props)); + BigtableTableAdminClient bigtable = mock(BigtableTableAdminClient.class); + doThrow(new RuntimeException()).when(bigtable).listTables(); + doReturn(bigtable).when(config).getBigtableAdminClient(any()); + assertFalse(config.isBigtableConfigurationValid()); + verify(bigtable, times(1)).close(); + } +} diff --git a/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/integration/BaseIT.java b/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/integration/BaseIT.java new file mode 100644 index 000000000..aeec5a650 --- /dev/null +++ b/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/integration/BaseIT.java @@ -0,0 +1,142 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.kafka.connect.bigtable.integration; + +import static com.google.cloud.kafka.connect.bigtable.config.BigtableSinkConfig.CONFIG_BIGTABLE_INSTANCE_ID; +import static com.google.cloud.kafka.connect.bigtable.config.BigtableSinkConfig.CONFIG_GCP_PROJECT_ID; +import static org.apache.kafka.connect.runtime.ConnectorConfig.CONNECTOR_CLASS_CONFIG; +import static org.apache.kafka.connect.runtime.ConnectorConfig.TASKS_MAX_CONFIG; +import static org.apache.kafka.connect.runtime.WorkerConfig.KEY_CONVERTER_CLASS_CONFIG; +import static org.apache.kafka.connect.runtime.WorkerConfig.VALUE_CONVERTER_CLASS_CONFIG; + +import com.google.cloud.bigtable.data.v2.BigtableDataClient; +import com.google.cloud.bigtable.data.v2.models.Query; +import com.google.cloud.bigtable.data.v2.models.Row; +import com.google.cloud.kafka.connect.bigtable.BigtableSinkConnector; +import com.google.cloud.kafka.connect.bigtable.config.BigtableSinkConfig; +import com.google.cloud.kafka.connect.bigtable.util.TestId; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.stream.Collectors; +import org.apache.kafka.clients.admin.Admin; +import org.apache.kafka.common.utils.Utils; +import org.apache.kafka.connect.runtime.WorkerConfig; +import org.apache.kafka.connect.runtime.isolation.PluginDiscoveryMode; +import org.apache.kafka.connect.storage.StringConverter; +import org.apache.kafka.connect.util.clusters.EmbeddedConnectCluster; +import org.junit.After; +import org.junit.Before; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public abstract class BaseIT { + private final Logger logger = LoggerFactory.getLogger(BaseIT.class); + protected EmbeddedConnectCluster connect; + private Admin kafkaAdminClient; + protected int numWorkers = 1; + protected int numBrokers = 1; + protected int numTasks = 1; + + @Before + public void setUp() { + startConnect(); + } + + @After + public void tearDown() { + stopConnect(); + } + + protected void startConnect() { + logger.info("Starting embedded Kafka Connect cluster..."); + Map workerProps = new HashMap<>(); + workerProps.put(WorkerConfig.OFFSET_COMMIT_INTERVAL_MS_CONFIG, Long.toString(10000)); + workerProps.put(WorkerConfig.PLUGIN_DISCOVERY_CONFIG, PluginDiscoveryMode.HYBRID_WARN.name()); + + Properties brokerProps = new Properties(); + brokerProps.put("message.max.bytes", 10 * 1024 * 1024); + brokerProps.put("auto.create.topics.enable", "false"); + brokerProps.put("delete.topic.enable", "true"); + connect = + new EmbeddedConnectCluster.Builder() + .name("kcbt-connect-cluster-" + getTestClassId()) + .numWorkers(numWorkers) + .numBrokers(numBrokers) + .brokerProps(brokerProps) + .workerProps(workerProps) + .build(); + + // Start the clusters + connect.start(); + try { + connect + .assertions() + .assertAtLeastNumWorkersAreUp(1, "Initial group of workers did not start in time."); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + kafkaAdminClient = connect.kafka().createAdminClient(); + } + + protected void stopConnect() { + logger.info("Stopping embedded Kafka Connect cluster..."); + if (kafkaAdminClient != null) { + Utils.closeQuietly(kafkaAdminClient, "Admin client for embedded Kafka cluster"); + kafkaAdminClient = null; + } + + // Stop all Connect, Kafka and Zk threads. + if (connect != null) { + Utils.closeQuietly(connect::stop, "Embedded Connect, Kafka, and Zookeeper clusters"); + connect = null; + } + } + + protected Map baseConnectorProps() { + Map result = new HashMap<>(); + + result.put(CONNECTOR_CLASS_CONFIG, BigtableSinkConnector.class.getCanonicalName()); + result.put(TASKS_MAX_CONFIG, Integer.toString(numTasks)); + result.put(KEY_CONVERTER_CLASS_CONFIG, StringConverter.class.getName()); + result.put(VALUE_CONVERTER_CLASS_CONFIG, StringConverter.class.getName()); + + // TODO: get it from environment variables after migrating to kokoro. + result.put(CONFIG_GCP_PROJECT_ID, "todotodo"); + result.put(CONFIG_BIGTABLE_INSTANCE_ID, "todotodo"); + + return result; + } + + protected BigtableDataClient getBigtableDataClient() { + BigtableSinkConfig config = new BigtableSinkConfig(baseConnectorProps()); + return config.getBigtableDataClient(); + } + + protected List readAllRows(BigtableDataClient bigtable, String table) { + Query query = Query.create(table); + return bigtable.readRows(query).stream().collect(Collectors.toList()); + } + + protected String getTestClassId() { + return TestId.getTestClassId(this.getClass()); + } + + protected String getTestCaseId() { + return TestId.getTestCaseId(this.getClass()); + } +} diff --git a/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/integration/BasicIT.java b/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/integration/BasicIT.java new file mode 100644 index 000000000..0fd454f5d --- /dev/null +++ b/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/integration/BasicIT.java @@ -0,0 +1,81 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.kafka.connect.bigtable.integration; + +import static org.apache.kafka.test.TestUtils.waitForCondition; + +import com.google.cloud.bigtable.data.v2.BigtableDataClient; +import com.google.cloud.bigtable.data.v2.models.Row; +import com.google.cloud.bigtable.data.v2.models.RowCell; +import com.google.cloud.kafka.connect.bigtable.config.BigtableSinkConfig; +import com.google.cloud.kafka.connect.bigtable.config.InsertMode; +import com.google.protobuf.ByteString; +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.Map; +import org.apache.kafka.connect.runtime.SinkConnectorConfig; +import org.apache.kafka.test.TestCondition; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +@RunWith(JUnit4.class) +public class BasicIT extends BaseIT { + private static final int TASK_CONSUME_TIMEOUT_MS = 10000; + + @Test + public void testSimpleWrite() throws InterruptedException { + BigtableDataClient bigtable = getBigtableDataClient(); + String topic = getTestCaseId(); + String connectorName = "connector-" + topic; + connect.kafka().createTopic(topic, numTasks); + Map props = baseConnectorProps(); + props.put(SinkConnectorConfig.TOPICS_CONFIG, topic); + props.put(BigtableSinkConfig.CONFIG_AUTO_CREATE_TABLES, "true"); + props.put(BigtableSinkConfig.CONFIG_AUTO_CREATE_COLUMN_FAMILIES, "true"); + props.put(BigtableSinkConfig.CONFIG_INSERT_MODE, InsertMode.UPSERT.name()); + connect.configureConnector(connectorName, props); + connect + .assertions() + .assertConnectorAndAtLeastNumTasksAreRunning( + connectorName, numTasks, "Connector start timeout"); + + int numberOfRecords = 1; + String key = "key"; + String value = "value"; + connect.kafka().produce(topic, key, value); + + TestCondition testCondition = + () -> { + List allRows = readAllRows(bigtable, topic); + if (numberOfRecords != allRows.size()) { + return false; + } + Row row = allRows.get(0); + if (!ByteString.copyFrom(key.getBytes(StandardCharsets.UTF_8)).equals(row.getKey())) { + return false; + } + List rowCells = row.getCells("default", "KAFKA_VALUE"); + if (numberOfRecords != rowCells.size()) { + return false; + } + return ByteString.copyFrom(value.getBytes(StandardCharsets.UTF_8)) + .equals(rowCells.get(0).getValue()); + }; + waitForCondition( + testCondition, TASK_CONSUME_TIMEOUT_MS, "Correct results not received in time"); + } +} diff --git a/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/integration/ConfigIT.java b/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/integration/ConfigIT.java new file mode 100644 index 000000000..ca616fa49 --- /dev/null +++ b/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/integration/ConfigIT.java @@ -0,0 +1,46 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.kafka.connect.bigtable.integration; + +import java.util.Map; +import org.apache.kafka.connect.runtime.SinkConnectorConfig; +import org.apache.kafka.connect.runtime.rest.errors.ConnectRestException; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +@RunWith(JUnit4.class) +public class ConfigIT extends BaseIT { + @Test + public void testBaseSuccess() throws InterruptedException { + String topic = getTestCaseId(); + String connectorName = "connector-" + topic; + connect.kafka().createTopic(topic, numTasks); + Map props = baseConnectorProps(); + props.put(SinkConnectorConfig.TOPICS_CONFIG, topic); + connect.configureConnector(connectorName, props); + connect + .assertions() + .assertConnectorAndAtLeastNumTasksAreRunning( + connectorName, numTasks, "Connector start timeout"); + } + + @Test(expected = ConnectRestException.class) + public void testUnconfiguredError() { + Map props = baseConnectorProps(); + connect.configureConnector(getTestCaseId(), props); + } +} diff --git a/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/integration/VersionIT.java b/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/integration/VersionIT.java new file mode 100644 index 000000000..5b51056b0 --- /dev/null +++ b/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/integration/VersionIT.java @@ -0,0 +1,53 @@ +/* + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.kafka.connect.bigtable.integration; + +import static org.junit.Assert.assertNotEquals; + +import com.fasterxml.jackson.databind.MapperFeature; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.cloud.kafka.connect.bigtable.BigtableSinkConnector; +import com.google.cloud.kafka.connect.bigtable.version.PackageMetadata; +import java.io.IOException; +import java.net.URI; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; +import java.util.Arrays; +import org.apache.kafka.connect.runtime.rest.entities.PluginInfo; +import org.junit.Test; + +public class VersionIT extends BaseIT { + @Test + public void testVersionIsSet() throws IOException, InterruptedException { + String url = connect.endpointForResource("connector-plugins"); + HttpClient http = HttpClient.newHttpClient(); + HttpRequest req = HttpRequest.newBuilder(URI.create(url)).GET().build(); + HttpResponse response = http.send(req, HttpResponse.BodyHandlers.ofString()); + ObjectMapper mapper = + new ObjectMapper() + .enable(MapperFeature.ACCEPT_CASE_INSENSITIVE_ENUMS) + .enable(MapperFeature.ACCEPT_CASE_INSENSITIVE_VALUES) + .enable(MapperFeature.ACCEPT_CASE_INSENSITIVE_PROPERTIES); + PluginInfo[] pluginInfos = mapper.readValue(response.body(), PluginInfo[].class); + PluginInfo pluginInfo = + Arrays.stream(pluginInfos) + .filter(i -> i.className().equals(BigtableSinkConnector.class.getCanonicalName())) + .findFirst() + .get(); + assertNotEquals(PackageMetadata.UNKNOWN_VERSION, pluginInfo.version()); + } +} diff --git a/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/mapping/KeyMapperTest.java b/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/mapping/KeyMapperTest.java new file mode 100644 index 000000000..01d26ce3a --- /dev/null +++ b/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/mapping/KeyMapperTest.java @@ -0,0 +1,835 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.kafka.connect.bigtable.mapping; + +/* + * This software contains code derived from the BigQuery Connector for Apache Kafka, + * Copyright Aiven Oy, which in turn contains code derived from the Confluent BigQuery + * Kafka Connector, Copyright Confluent, Inc, which in turn contains code derived from + * the WePay BigQuery Kafka Connector, Copyright WePay, Inc. + */ + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; + +import com.google.cloud.kafka.connect.bigtable.util.JsonConverterFactory; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.apache.kafka.connect.data.Schema; +import org.apache.kafka.connect.data.SchemaAndValue; +import org.apache.kafka.connect.data.SchemaBuilder; +import org.apache.kafka.connect.data.Struct; +import org.apache.kafka.connect.errors.DataException; +import org.apache.kafka.connect.json.JsonConverter; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +// Note that in many tests when we compare with toString() result, we most probably compare our and +// Confluent sink's implementations. +@RunWith(JUnit4.class) +public class KeyMapperTest { + private static String DELIMITER = "##"; + + @Test + public void testBoolean() { + final String fieldName = "Boolean"; + final Boolean fieldValue = true; + + Schema kafkaConnectSchema = + SchemaBuilder.struct().field(fieldName, Schema.BOOLEAN_SCHEMA).build(); + + Struct kafkaConnectStruct = new Struct(kafkaConnectSchema); + kafkaConnectStruct.put(fieldName, fieldValue); + + assertTrue( + Arrays.equals( + calculateKey(List.of(), DELIMITER, fieldValue), + fieldValue.toString().getBytes(StandardCharsets.UTF_8))); + assertTrue( + Arrays.equals( + calculateKey(List.of(), DELIMITER, kafkaConnectStruct), + fieldValue.toString().getBytes(StandardCharsets.UTF_8))); + assertTrue( + Arrays.equals( + calculateKey(List.of(fieldName), DELIMITER, kafkaConnectStruct), + fieldValue.toString().getBytes(StandardCharsets.UTF_8))); + } + + @Test + public void testInteger() { + final String fieldName = "Integer"; + final Byte fieldByteValue = (byte) 42; + Schema kafkaConnectSchema = SchemaBuilder.struct().field(fieldName, Schema.INT8_SCHEMA).build(); + + Struct kafkaConnectStruct = new Struct(kafkaConnectSchema); + kafkaConnectStruct.put(fieldName, fieldByteValue); + assertTrue( + Arrays.equals( + calculateKey(List.of(), DELIMITER, fieldByteValue), + fieldByteValue.toString().getBytes(StandardCharsets.UTF_8))); + assertTrue( + Arrays.equals( + calculateKey(List.of(), DELIMITER, kafkaConnectStruct), + fieldByteValue.toString().getBytes(StandardCharsets.UTF_8))); + assertTrue( + Arrays.equals( + calculateKey(List.of(fieldName), DELIMITER, kafkaConnectStruct), + fieldByteValue.toString().getBytes(StandardCharsets.UTF_8))); + + final Short fieldShortValue = (short) 4242; + kafkaConnectSchema = SchemaBuilder.struct().field(fieldName, Schema.INT16_SCHEMA).build(); + + kafkaConnectStruct = new Struct(kafkaConnectSchema); + kafkaConnectStruct.put(fieldName, fieldShortValue); + assertTrue( + Arrays.equals( + calculateKey(List.of(), DELIMITER, fieldShortValue), + fieldShortValue.toString().getBytes(StandardCharsets.UTF_8))); + assertTrue( + Arrays.equals( + calculateKey(List.of(), DELIMITER, kafkaConnectStruct), + fieldShortValue.toString().getBytes(StandardCharsets.UTF_8))); + assertTrue( + Arrays.equals( + calculateKey(List.of(fieldName), DELIMITER, kafkaConnectStruct), + fieldShortValue.toString().getBytes(StandardCharsets.UTF_8))); + + final Integer fieldIntegerValue = 424242; + kafkaConnectSchema = SchemaBuilder.struct().field(fieldName, Schema.INT32_SCHEMA).build(); + + kafkaConnectStruct = new Struct(kafkaConnectSchema); + kafkaConnectStruct.put(fieldName, fieldIntegerValue); + assertTrue( + Arrays.equals( + calculateKey(List.of(), DELIMITER, fieldIntegerValue), + fieldIntegerValue.toString().getBytes(StandardCharsets.UTF_8))); + assertTrue( + Arrays.equals( + calculateKey(List.of(), DELIMITER, kafkaConnectStruct), + fieldIntegerValue.toString().getBytes(StandardCharsets.UTF_8))); + assertTrue( + Arrays.equals( + calculateKey(List.of(fieldName), DELIMITER, kafkaConnectStruct), + fieldIntegerValue.toString().getBytes(StandardCharsets.UTF_8))); + + final Long fieldLongValue = 424242424242L; + kafkaConnectSchema = SchemaBuilder.struct().field(fieldName, Schema.INT64_SCHEMA).build(); + + kafkaConnectStruct = new Struct(kafkaConnectSchema); + kafkaConnectStruct.put(fieldName, fieldLongValue); + assertTrue( + Arrays.equals( + calculateKey(List.of(), DELIMITER, fieldLongValue), + fieldLongValue.toString().getBytes(StandardCharsets.UTF_8))); + assertTrue( + Arrays.equals( + calculateKey(List.of(), DELIMITER, kafkaConnectStruct), + fieldLongValue.toString().getBytes(StandardCharsets.UTF_8))); + assertTrue( + Arrays.equals( + calculateKey(List.of(fieldName), DELIMITER, kafkaConnectStruct), + fieldLongValue.toString().getBytes(StandardCharsets.UTF_8))); + } + + @Test + public void testFloat() { + final String fieldName = "Float"; + final Float fieldFloatValue = 4242424242.4242F; + Schema kafkaConnectSchema = + SchemaBuilder.struct().field(fieldName, Schema.FLOAT32_SCHEMA).build(); + + Struct kafkaConnectStruct = new Struct(kafkaConnectSchema); + kafkaConnectStruct.put(fieldName, fieldFloatValue); + assertTrue( + Arrays.equals( + calculateKey(List.of(), DELIMITER, fieldFloatValue), + fieldFloatValue.toString().getBytes(StandardCharsets.UTF_8))); + assertTrue( + Arrays.equals( + calculateKey(List.of(), DELIMITER, kafkaConnectStruct), + fieldFloatValue.toString().getBytes(StandardCharsets.UTF_8))); + assertTrue( + Arrays.equals( + calculateKey(List.of(fieldName), DELIMITER, kafkaConnectStruct), + fieldFloatValue.toString().getBytes(StandardCharsets.UTF_8))); + + final Double fieldDoubleValue = 4242424242.4242; + kafkaConnectSchema = SchemaBuilder.struct().field(fieldName, Schema.FLOAT64_SCHEMA).build(); + + kafkaConnectStruct = new Struct(kafkaConnectSchema); + kafkaConnectStruct.put(fieldName, fieldDoubleValue); + assertTrue( + Arrays.equals( + calculateKey(List.of(), DELIMITER, fieldDoubleValue), + fieldDoubleValue.toString().getBytes(StandardCharsets.UTF_8))); + assertTrue( + Arrays.equals( + calculateKey(List.of(), DELIMITER, kafkaConnectStruct), + fieldDoubleValue.toString().getBytes(StandardCharsets.UTF_8))); + assertTrue( + Arrays.equals( + calculateKey(List.of(fieldName), DELIMITER, kafkaConnectStruct), + fieldDoubleValue.toString().getBytes(StandardCharsets.UTF_8))); + } + + @Test + public void testDoubleSpecial() { + final String fieldName = "Double"; + + List testValues = + Arrays.asList(Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY, Double.NaN); + List expectedValues = + Arrays.asList(Double.MAX_VALUE, Double.MIN_VALUE, Double.MIN_VALUE); + assertEquals(testValues.size(), expectedValues.size()); + + for (int test = 0; test < testValues.size(); ++test) { + Schema kafkaConnectSchema = + SchemaBuilder.struct().field(fieldName, Schema.FLOAT64_SCHEMA).build(); + Double testValue = testValues.get(test); + + Struct kafkaConnectStruct = new Struct(kafkaConnectSchema); + kafkaConnectStruct.put(fieldName, testValue); + assertTrue( + Arrays.equals( + calculateKey(List.of(), DELIMITER, testValue), + testValue.toString().getBytes(StandardCharsets.UTF_8))); + assertTrue( + Arrays.equals( + calculateKey(List.of(), DELIMITER, kafkaConnectStruct), + testValue.toString().getBytes(StandardCharsets.UTF_8))); + assertTrue( + Arrays.equals( + calculateKey(List.of(fieldName), DELIMITER, kafkaConnectStruct), + testValue.toString().getBytes(StandardCharsets.UTF_8))); + } + } + + @Test + public void testString() { + final String fieldName = "String"; + final String fieldValue = "42424242424242424242424242424242"; + Schema kafkaConnectSchema = + SchemaBuilder.struct().field(fieldName, Schema.STRING_SCHEMA).build(); + + Struct kafkaConnectStruct = new Struct(kafkaConnectSchema); + kafkaConnectStruct.put(fieldName, fieldValue); + assertTrue( + Arrays.equals( + calculateKey(List.of(), DELIMITER, fieldValue), + fieldValue.toString().getBytes(StandardCharsets.UTF_8))); + assertTrue( + Arrays.equals( + calculateKey(List.of(), DELIMITER, kafkaConnectStruct), + fieldValue.toString().getBytes(StandardCharsets.UTF_8))); + assertTrue( + Arrays.equals( + calculateKey(List.of(fieldName), DELIMITER, kafkaConnectStruct), + fieldValue.toString().getBytes(StandardCharsets.UTF_8))); + } + + @Test + public void testStruct() { + final String middleFieldStructName = "MiddleStruct"; + final String middleFieldArrayName = "MiddleArray"; + final String innerFieldStructName = "InnerStruct"; + final String innerFieldStringName = "InnerString"; + final String innerFieldIntegerName = "InnerInt"; + final String innerStringValue = "forty two"; + final Integer innerIntegerValue = 42; + final List middleArrayValue = Arrays.asList(42.0f, 42.4f, 42.42f, 42.424f, 42.4242f); + + Schema kafkaConnectInnerSchema = + SchemaBuilder.struct() + .field(innerFieldStringName, Schema.STRING_SCHEMA) + .field(innerFieldIntegerName, Schema.INT32_SCHEMA) + .build(); + + Struct kafkaConnectInnerStruct = new Struct(kafkaConnectInnerSchema); + kafkaConnectInnerStruct.put(innerFieldStringName, innerStringValue); + kafkaConnectInnerStruct.put(innerFieldIntegerName, innerIntegerValue); + + assertTrue( + Arrays.equals( + calculateKey(List.of(), DELIMITER, kafkaConnectInnerStruct), + (innerStringValue + DELIMITER + innerIntegerValue).getBytes(StandardCharsets.UTF_8))); + assertTrue( + Arrays.equals( + calculateKey(List.of(innerFieldStringName), DELIMITER, kafkaConnectInnerStruct), + innerStringValue.toString().getBytes(StandardCharsets.UTF_8))); + assertTrue( + Arrays.equals( + calculateKey(List.of(innerFieldIntegerName), DELIMITER, kafkaConnectInnerStruct), + innerIntegerValue.toString().getBytes(StandardCharsets.UTF_8))); + + Schema kafkaConnectMiddleSchema = + SchemaBuilder.struct() + .field(innerFieldStructName, kafkaConnectInnerSchema) + .field(middleFieldArrayName, SchemaBuilder.array(Schema.FLOAT32_SCHEMA).build()) + .build(); + + Struct kafkaConnectMiddleStruct = new Struct(kafkaConnectMiddleSchema); + kafkaConnectMiddleStruct.put(innerFieldStructName, kafkaConnectInnerStruct); + kafkaConnectMiddleStruct.put(middleFieldArrayName, middleArrayValue); + assertTrue( + Arrays.equals( + calculateKey(List.of(), DELIMITER, kafkaConnectMiddleStruct), + (kafkaConnectInnerStruct.toString() + DELIMITER + middleArrayValue.toString()) + .getBytes(StandardCharsets.UTF_8))); + assertTrue( + Arrays.equals( + calculateKey(List.of(innerFieldStructName), DELIMITER, kafkaConnectMiddleStruct), + kafkaConnectInnerStruct.toString().getBytes(StandardCharsets.UTF_8))); + assertTrue( + Arrays.equals( + calculateKey(List.of(middleFieldArrayName), DELIMITER, kafkaConnectMiddleStruct), + middleArrayValue.toString().getBytes(StandardCharsets.UTF_8))); + assertTrue( + Arrays.equals( + calculateKey( + List.of(innerFieldStructName + "." + innerFieldStringName), + DELIMITER, + kafkaConnectMiddleStruct), + innerStringValue.toString().getBytes(StandardCharsets.UTF_8))); + assertTrue( + Arrays.equals( + calculateKey( + List.of(innerFieldStructName + "." + innerFieldIntegerName), + DELIMITER, + kafkaConnectMiddleStruct), + innerIntegerValue.toString().getBytes(StandardCharsets.UTF_8))); + + Schema kafkaConnectOuterSchema = + SchemaBuilder.struct() + .field(innerFieldStructName, kafkaConnectInnerSchema) + .field(middleFieldStructName, kafkaConnectMiddleSchema) + .build(); + + Struct kafkaConnectOuterStruct = new Struct(kafkaConnectOuterSchema); + kafkaConnectOuterStruct.put(innerFieldStructName, kafkaConnectInnerStruct); + kafkaConnectOuterStruct.put(middleFieldStructName, kafkaConnectMiddleStruct); + + assertTrue( + Arrays.equals( + calculateKey(List.of(), DELIMITER, kafkaConnectOuterStruct), + (kafkaConnectInnerStruct.toString() + DELIMITER + kafkaConnectMiddleStruct.toString()) + .getBytes(StandardCharsets.UTF_8))); + assertTrue( + Arrays.equals( + calculateKey(List.of(innerFieldStructName), DELIMITER, kafkaConnectOuterStruct), + kafkaConnectInnerStruct.toString().getBytes(StandardCharsets.UTF_8))); + assertTrue( + Arrays.equals( + calculateKey(List.of(middleFieldStructName), DELIMITER, kafkaConnectOuterStruct), + kafkaConnectMiddleStruct.toString().getBytes(StandardCharsets.UTF_8))); + assertTrue( + Arrays.equals( + calculateKey( + List.of(innerFieldStructName + "." + innerFieldStringName), + DELIMITER, + kafkaConnectOuterStruct), + innerStringValue.toString().getBytes(StandardCharsets.UTF_8))); + assertTrue( + Arrays.equals( + calculateKey( + List.of( + middleFieldStructName + + "." + + innerFieldStructName + + "." + + innerFieldIntegerName), + DELIMITER, + kafkaConnectOuterStruct), + innerIntegerValue.toString().getBytes(StandardCharsets.UTF_8))); + } + + @Test + public void testEmptyInnerStruct() { + final String innerFieldStructName = "InnerStruct"; + final String innerFieldStringName = "InnerString"; + final String innerStringValue = "forty two"; + + Schema kafkaConnectInnerSchema = SchemaBuilder.struct().build(); + + Struct kafkaConnectInnerStruct = new Struct(kafkaConnectInnerSchema); + + Schema kafkaConnectOuterSchema = + SchemaBuilder.struct() + .field(innerFieldStructName, kafkaConnectInnerSchema) + .field(innerFieldStringName, Schema.STRING_SCHEMA) + .build(); + + Struct kafkaConnectOuterStruct = new Struct(kafkaConnectOuterSchema); + kafkaConnectOuterStruct.put(innerFieldStructName, kafkaConnectInnerStruct); + kafkaConnectOuterStruct.put(innerFieldStringName, innerStringValue); + assertTrue( + Arrays.equals( + calculateKey(List.of(), DELIMITER, kafkaConnectOuterStruct), + (kafkaConnectInnerStruct + DELIMITER + innerStringValue) + .getBytes(StandardCharsets.UTF_8))); + assertTrue( + Arrays.equals( + calculateKey(List.of(innerFieldStructName), DELIMITER, kafkaConnectOuterStruct), + kafkaConnectInnerStruct.toString().getBytes(StandardCharsets.UTF_8))); + assertTrue( + Arrays.equals( + calculateKey(List.of(innerFieldStructName), DELIMITER, kafkaConnectOuterStruct), + "Struct{}".getBytes(StandardCharsets.UTF_8))); + } + + @Test + public void testNull() { + Schema structOnlyOptionalFieldsSchema = + SchemaBuilder.struct().field("f", SchemaBuilder.bool().optional()).build(); + Struct structNoOptionalFields = new Struct(structOnlyOptionalFieldsSchema); + + assertThrows(DataException.class, () -> calculateKey(List.of(), DELIMITER, null)); + assertThrows( + DataException.class, () -> calculateKey(List.of(), DELIMITER, structNoOptionalFields)); + } + + @Test + public void testUnmappableValues() { + Schema structNoFieldsSchema = SchemaBuilder.struct().build(); + Struct structNoFields = new Struct(structNoFieldsSchema); + + byte[] expected = new byte[0]; + assertArrayEquals(expected, calculateKey(List.of(), DELIMITER, "")); + assertArrayEquals(expected, calculateKey(List.of(), DELIMITER, new byte[0])); + assertArrayEquals(expected, calculateKey(List.of(), DELIMITER, new HashMap<>())); + assertArrayEquals(expected, calculateKey(List.of(), DELIMITER, structNoFields)); + } + + @Test + public void testDifferentStructMappings() { + final String innerFieldStringName = "InnerString"; + final String innerFieldIntegerName = "InnerInt"; + final String innerStringValue = "forty two"; + final Integer innerIntegerValue = 42; + + Schema kafkaConnectInnerSchema = + SchemaBuilder.struct() + .field(innerFieldStringName, Schema.STRING_SCHEMA) + .field(innerFieldIntegerName, Schema.INT32_SCHEMA) + .build(); + Struct kafkaConnectInnerStruct = new Struct(kafkaConnectInnerSchema); + kafkaConnectInnerStruct.put(innerFieldStringName, innerStringValue); + kafkaConnectInnerStruct.put(innerFieldIntegerName, innerIntegerValue); + + // Note that it preserves field order from the Schema. + assertTrue( + Arrays.equals( + calculateKey(List.of(), DELIMITER, kafkaConnectInnerStruct), + (innerStringValue + DELIMITER + innerIntegerValue).getBytes(StandardCharsets.UTF_8))); + // Force another order. + assertTrue( + Arrays.equals( + calculateKey( + List.of(innerFieldIntegerName, innerFieldStringName), + DELIMITER, + kafkaConnectInnerStruct), + (innerIntegerValue + DELIMITER + innerStringValue).getBytes(StandardCharsets.UTF_8))); + // Use the same field twice. + assertTrue( + Arrays.equals( + calculateKey( + List.of(innerFieldIntegerName, innerFieldIntegerName), + DELIMITER, + kafkaConnectInnerStruct), + (innerIntegerValue + DELIMITER + innerIntegerValue).getBytes(StandardCharsets.UTF_8))); + // Try accessing nonexistent key. + assertThrows( + DataException.class, + () -> calculateKey(List.of("invalid"), DELIMITER, kafkaConnectInnerStruct)); + } + + @Test + public void testMap() { + final String fieldNameIntegerMap = "IntegerMap"; + final String fieldNameStringMap = "StringMap"; + final Map integerMap = new HashMap<>(); + final Map stringMap = new HashMap<>(); + + for (int n = 2; n <= 10; n++) { + boolean isPrime = true; + for (int d : integerMap.keySet()) { + if (n % d == 0) { + isPrime = false; + break; + } + } + integerMap.put(n, isPrime); + } + for (int n = 2; n <= 10; n++) { + boolean isPrime = true; + for (String s : stringMap.keySet()) { + Integer d = Integer.parseInt(s); + if (n % d == 0) { + isPrime = false; + break; + } + } + stringMap.put(Integer.toString(n), isPrime); + } + Schema kafkaConnectSchema = + SchemaBuilder.struct() + .field( + fieldNameIntegerMap, SchemaBuilder.map(Schema.INT32_SCHEMA, Schema.BOOLEAN_SCHEMA)) + .field( + fieldNameStringMap, SchemaBuilder.map(Schema.STRING_SCHEMA, Schema.BOOLEAN_SCHEMA)) + .build(); + + Struct kafkaConnectStruct = new Struct(kafkaConnectSchema); + kafkaConnectStruct.put(fieldNameIntegerMap, integerMap); + kafkaConnectStruct.put(fieldNameStringMap, stringMap); + assertTrue( + Arrays.equals( + calculateKey(List.of(), DELIMITER, kafkaConnectStruct), + (integerMap.toString() + DELIMITER + stringMap.toString()) + .getBytes(StandardCharsets.UTF_8))); + assertTrue( + Arrays.equals( + calculateKey(List.of(fieldNameIntegerMap), DELIMITER, kafkaConnectStruct), + integerMap.toString().getBytes(StandardCharsets.UTF_8))); + assertTrue( + Arrays.equals( + calculateKey(List.of(fieldNameStringMap), DELIMITER, kafkaConnectStruct), + stringMap.toString().getBytes(StandardCharsets.UTF_8))); + // The key is Integer, not String - we don't support it + assertThrows( + DataException.class, + () -> calculateKey(List.of(fieldNameIntegerMap + ".3"), DELIMITER, kafkaConnectStruct)); + assertTrue( + Arrays.equals( + calculateKey(List.of(fieldNameStringMap + ".3"), DELIMITER, kafkaConnectStruct), + "true".getBytes(StandardCharsets.UTF_8))); + } + + @Test + public void testIntegerArray() { + final String fieldName = "IntegerArray"; + final List fieldValue = Arrays.asList(42, 4242, 424242, 42424242); + + Schema kafkaConnectSchema = + SchemaBuilder.struct() + .field(fieldName, SchemaBuilder.array(Schema.INT32_SCHEMA).build()) + .build(); + + Struct kafkaConnectStruct = new Struct(kafkaConnectSchema); + kafkaConnectStruct.put(fieldName, fieldValue); + assertTrue( + Arrays.equals( + calculateKey(List.of(), DELIMITER, kafkaConnectStruct), + fieldValue.toString().getBytes(StandardCharsets.UTF_8))); + assertTrue( + Arrays.equals( + calculateKey(List.of(fieldName), DELIMITER, kafkaConnectStruct), + fieldValue.toString().getBytes(StandardCharsets.UTF_8))); + } + + @Test + public void testStructArray() { + final String innerFieldStringName = "InnerString"; + final String innerFieldIntegerName = "InnerInt"; + final String innerStringValue = "42"; + final Integer innerIntegerValue = 42; + Schema kafkaConnectInnerSchema = + SchemaBuilder.struct() + .field(innerFieldStringName, Schema.STRING_SCHEMA) + .field(innerFieldIntegerName, Schema.INT32_SCHEMA) + .build(); + Struct kafkaConnectInnerStruct = new Struct(kafkaConnectInnerSchema); + kafkaConnectInnerStruct.put(innerFieldStringName, innerStringValue); + kafkaConnectInnerStruct.put(innerFieldIntegerName, innerIntegerValue); + + final String middleFieldArrayName = "MiddleArray"; + Schema kafkaConnectSchema = + SchemaBuilder.struct() + .field(middleFieldArrayName, SchemaBuilder.array(kafkaConnectInnerSchema).build()) + .build(); + + Struct kafkaConnectStruct = new Struct(kafkaConnectSchema); + List innerStructList = List.of(kafkaConnectInnerStruct); + kafkaConnectStruct.put(middleFieldArrayName, innerStructList); + + assertTrue( + Arrays.equals( + calculateKey(List.of(), DELIMITER, kafkaConnectStruct), + innerStructList.toString().getBytes(StandardCharsets.UTF_8))); + assertTrue( + Arrays.equals( + calculateKey(List.of(middleFieldArrayName), DELIMITER, kafkaConnectStruct), + innerStructList.toString().getBytes(StandardCharsets.UTF_8))); + } + + @Test + public void testStringArray() { + final String fieldName = "StringArray"; + final List fieldValue = + Arrays.asList("Forty-two", "forty-two", "Forty two", "forty two"); + + Schema kafkaConnectSchema = + SchemaBuilder.struct() + .field(fieldName, SchemaBuilder.array(Schema.STRING_SCHEMA).build()) + .build(); + + Struct kafkaConnectStruct = new Struct(kafkaConnectSchema); + kafkaConnectStruct.put(fieldName, fieldValue); + + assertTrue( + Arrays.equals( + calculateKey(List.of(), DELIMITER, kafkaConnectStruct), + fieldValue.toString().getBytes(StandardCharsets.UTF_8))); + assertTrue( + Arrays.equals( + calculateKey(List.of(fieldName), DELIMITER, kafkaConnectStruct), + fieldValue.toString().getBytes(StandardCharsets.UTF_8))); + } + + @Test + public void testBytes() { + final String fieldName = "Bytes"; + final byte[] fieldBytes = new byte[] {42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54}; + final ByteBuffer fieldValueKafkaConnect = ByteBuffer.wrap(fieldBytes); + Schema kafkaConnectSchema = + SchemaBuilder.struct().field(fieldName, Schema.BYTES_SCHEMA).build(); + + Struct kafkaConnectStruct = new Struct(kafkaConnectSchema); + kafkaConnectStruct.put(fieldName, fieldValueKafkaConnect); + assertTrue(Arrays.equals(calculateKey(List.of(), DELIMITER, kafkaConnectStruct), fieldBytes)); + assertTrue( + Arrays.equals(calculateKey(List.of(fieldName), DELIMITER, kafkaConnectStruct), fieldBytes)); + } + + @Test + public void testBytesInStruct() throws IOException { + final String innerFieldStructName = "InnerStruct"; + final String innerFieldBytesName = "InnerBytes"; + ByteArrayOutputStream inputBuilder = new ByteArrayOutputStream(); + for (int i = -128; i < 128; i++) { + inputBuilder.write(i); + } + byte[] innerBytesValue = inputBuilder.toByteArray(); + + Schema kafkaConnectInnerSchema = + SchemaBuilder.struct().field(innerFieldBytesName, Schema.BYTES_SCHEMA).build(); + Struct kafkaConnectInnerStruct = new Struct(kafkaConnectInnerSchema); + kafkaConnectInnerStruct.put(innerFieldBytesName, innerBytesValue); + + Schema kafkaConnectOuterSchema = + SchemaBuilder.struct().field(innerFieldStructName, kafkaConnectInnerSchema).build(); + + Struct kafkaConnectOuterStruct = new Struct(kafkaConnectOuterSchema); + kafkaConnectOuterStruct.put(innerFieldStructName, kafkaConnectInnerStruct); + + ByteArrayOutputStream expectedBuilder = new ByteArrayOutputStream(); + expectedBuilder.write(("Struct{" + innerFieldBytesName + "=").getBytes(StandardCharsets.UTF_8)); + expectedBuilder.write(innerBytesValue); + expectedBuilder.write("}".getBytes(StandardCharsets.UTF_8)); + byte[] expectedStructSerialization = expectedBuilder.toByteArray(); + + assertTrue( + Arrays.equals( + calculateKey(List.of(), DELIMITER, kafkaConnectOuterStruct), + expectedStructSerialization)); + assertTrue( + Arrays.equals( + calculateKey(List.of(innerFieldStructName), DELIMITER, kafkaConnectOuterStruct), + expectedStructSerialization)); + assertTrue( + Arrays.equals( + calculateKey( + List.of(innerFieldStructName + "." + innerFieldBytesName), + DELIMITER, + kafkaConnectOuterStruct), + innerBytesValue)); + } + + @Test + public void testKafkaLogicalTypes() { + final String dateFieldName = "KafkaDate"; + final String timestampFieldName = "KafkaTimestamp"; + final String timeFieldName = "KafkaTime"; + final String decimalFieldName = "KafkaDecimal"; + final Long dateLong = 1488406838808L; + final Date date = new Date(dateLong); + final String decimalString = "0.30000000000000004"; + final Integer decimalScale = 0; + final BigDecimal decimal = new BigDecimal(decimalString); + + Schema kafkaConnectSchema = + SchemaBuilder.struct() + .field(dateFieldName, org.apache.kafka.connect.data.Date.SCHEMA) + .field(timestampFieldName, org.apache.kafka.connect.data.Timestamp.SCHEMA) + .field(timeFieldName, org.apache.kafka.connect.data.Timestamp.SCHEMA) + .field(decimalFieldName, org.apache.kafka.connect.data.Decimal.schema(decimalScale)) + .build(); + + Struct kafkaConnectStruct = new Struct(kafkaConnectSchema); + kafkaConnectStruct.put(dateFieldName, date); + kafkaConnectStruct.put(timestampFieldName, date); + kafkaConnectStruct.put(timeFieldName, date); + kafkaConnectStruct.put(decimalFieldName, decimal); + // TODO: test in practice whether the Confluent sink works exactly like this. + assertTrue( + Arrays.equals( + calculateKey(List.of(dateFieldName), DELIMITER, kafkaConnectStruct), + date.toString().getBytes(StandardCharsets.UTF_8))); + assertTrue( + Arrays.equals( + calculateKey(List.of(timestampFieldName), DELIMITER, kafkaConnectStruct), + date.toString().getBytes(StandardCharsets.UTF_8))); + assertTrue( + Arrays.equals( + calculateKey(List.of(timeFieldName), DELIMITER, kafkaConnectStruct), + date.toString().getBytes(StandardCharsets.UTF_8))); + assertTrue( + Arrays.equals( + calculateKey(List.of(decimalFieldName), DELIMITER, kafkaConnectStruct), + decimalString.getBytes(StandardCharsets.UTF_8))); + } + + @Test + public void testNullable() { + final String nullableFieldName = "nullable"; + final String requiredFieldName = "required"; + final Integer nullableFieldValue = null; + final Integer requiredFieldValue = 42; + + Schema kafkaConnectSchema = + SchemaBuilder.struct() + .field(nullableFieldName, SchemaBuilder.int32().optional().build()) + .field(requiredFieldName, SchemaBuilder.int32().required().build()) + .build(); + + Struct kafkaConnectStruct = new Struct(kafkaConnectSchema); + kafkaConnectStruct.put(nullableFieldName, nullableFieldValue); + kafkaConnectStruct.put(requiredFieldName, requiredFieldValue); + assertThrows(DataException.class, () -> calculateKey(List.of(), DELIMITER, kafkaConnectStruct)); + assertThrows( + DataException.class, + () -> calculateKey(List.of(nullableFieldName), DELIMITER, kafkaConnectStruct)); + assertTrue( + Arrays.equals( + calculateKey(List.of(requiredFieldName), DELIMITER, kafkaConnectStruct), + requiredFieldValue.toString().getBytes(StandardCharsets.UTF_8))); + } + + @Test + public void testNullableStruct() { + final String nullableFieldName = "nullableStruct"; + final String innerStructFieldName = "foobar"; + + Schema kafkaConnectSchema = + SchemaBuilder.struct() + .field( + nullableFieldName, + SchemaBuilder.struct() + .field(innerStructFieldName, SchemaBuilder.bool().build()) + .optional() + .build()) + .build(); + + Struct kafkaConnectStruct = new Struct(kafkaConnectSchema); + kafkaConnectStruct.put(nullableFieldName, null); + assertThrows(DataException.class, () -> calculateKey(List.of(), DELIMITER, kafkaConnectStruct)); + assertThrows( + DataException.class, + () -> calculateKey(List.of(nullableFieldName), DELIMITER, kafkaConnectStruct)); + assertThrows( + DataException.class, + () -> + calculateKey( + List.of(nullableFieldName + "." + innerStructFieldName), + DELIMITER, + kafkaConnectStruct)); + } + + @Test + public void testSchemalessRecordSuccesses() { + JsonConverter jsonConverter = JsonConverterFactory.create(false, true); + + String topic = "topic"; + String delimiter = "##"; + + for (Object[] testCase : + List.of( + // Default key definition and all kinds of types. + // I know of no way to pass unserialized bytes or logical types here. I think it's only + // possible using some kind of schema. + new Object[] {List.of(), "2.130", "2.13"}, + new Object[] {List.of(), "7", "7"}, + new Object[] {List.of(), "\"x\"", "x"}, + new Object[] {List.of(), "true", "true"}, + new Object[] {List.of(), "[]", "[]"}, + new Object[] {List.of(), "[1,\"s\",true]", "[1, s, true]"}, + // Default key definition when using on a map (schemaless data is converted into Map not + // Struct!). + new Object[] {List.of(), "{\"a\":1,\"b\":true,\"c\":\"str\"}", "1##true##str"}, + new Object[] { + List.of(), "{\"b\":1,\"a\":3}", "3##1" + }, // Note it doesn't keep key ordering. + new Object[] { + List.of(), + "{\"b\":[1,2],\"a\":3,\"c\":{\"x\":\"D\",\"y\":2137}}", + "3##[1, 2]##{x=D, y=2137}" + }, + // Key extraction and serialization of nested beings. + new Object[] {List.of("f"), "{\"f\":{}}", "{}"}, + new Object[] {List.of("f"), "{\"f\":1}", "1"}, + new Object[] {List.of("f"), "{\"f\":true}", "true"}, + new Object[] {List.of("f"), "{\"f\":\"s\"}", "s"}, + new Object[] {List.of("f"), "{\"f\":[]}", "[]"}, + new Object[] {List.of("f"), "{\"f\":[1,\"a\"]}", "[1, a]"}, + new Object[] {List.of("f"), "{\"f\":{\"b\":1,\"a\":3}}", "{a=3, b=1}"}, + new Object[] {List.of("f"), "{\"f\":{\"a\":{\"b\": true}}}", "{a={b=true}}"}, + new Object[] { + List.of("f"), "{\"f\":{\"a\":{\"b\": true,\"c\":2}}}", "{a={b=true, c=2}}" + }, + new Object[] {List.of("f.a"), "{\"f\":{\"b\":1,\"a\":3}}", "3"})) { + KeyMapper mapper = new KeyMapper(delimiter, (List) testCase[0]); + SchemaAndValue connectData = + jsonConverter.toConnectData( + topic, ((String) testCase[1]).getBytes(StandardCharsets.UTF_8)); + byte[] expectedResult = ((String) testCase[2]).getBytes(StandardCharsets.UTF_8); + byte[] result = mapper.getKey(connectData.value()); + assertTrue(Arrays.equals(expectedResult, result)); + } + ; + } + + @Test + public void testAccessingSchemalessPrimitiveField() { + KeyMapper mapper = new KeyMapper("#", List.of("fieldName")); + assertThrows(DataException.class, () -> mapper.getKey("primitiveString")); + } + + private static byte[] calculateKey( + List mapperDefinition, String mapperDelimiter, Object kafkaKey) { + KeyMapper mapper = new KeyMapper(mapperDelimiter, mapperDefinition); + return mapper.getKey(kafkaKey); + } +} diff --git a/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/mapping/MutationDataBuilderTest.java b/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/mapping/MutationDataBuilderTest.java new file mode 100644 index 000000000..62fc0d001 --- /dev/null +++ b/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/mapping/MutationDataBuilderTest.java @@ -0,0 +1,102 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.kafka.connect.bigtable.mapping; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; + +import com.google.cloud.bigtable.data.v2.models.Mutation; +import com.google.cloud.bigtable.data.v2.models.Range; +import com.google.protobuf.ByteString; +import java.nio.charset.StandardCharsets; +import java.util.Optional; +import java.util.Set; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +@RunWith(JUnit4.class) +public class MutationDataBuilderTest { + private static final ByteString ROW_KEY = + ByteString.copyFrom("ROW_KEY".getBytes(StandardCharsets.UTF_8)); + private static final String TARGET_TABLE_NAME = "table"; + private static final String COLUMN_FAMILY = "family"; + private static final ByteString COLUMN_QUALIFIER = + ByteString.copyFrom("COLUMN".getBytes(StandardCharsets.UTF_8)); + private static final ByteString VALUE = + ByteString.copyFrom("VALUE".getBytes(StandardCharsets.UTF_8)); + private static final Long TIMESTAMP = 2024L; + private static final Range.TimestampRange TIMESTAMP_RANGE = + Range.TimestampRange.create(0, TIMESTAMP); + + Mutation mutation; + MutationDataBuilder mutationDataBuilder; + + @Before + public void setUp() { + mutation = mock(Mutation.class); + mutationDataBuilder = new MutationDataBuilder(mutation); + } + + @Test + public void testEmpty() { + assertTrue(mutationDataBuilder.maybeBuild(TARGET_TABLE_NAME, ROW_KEY).isEmpty()); + } + + @Test + public void testDeleteRow() { + mutationDataBuilder.deleteRow(); + verify(mutation, times(1)).deleteRow(); + Optional mutationData = + mutationDataBuilder.maybeBuild(TARGET_TABLE_NAME, ROW_KEY); + assertTrue(mutationData.isPresent()); + assertEquals(mutationData.get().getRequiredColumnFamilies(), Set.of()); + } + + @Test + public void testDeleteCells() { + mutationDataBuilder.deleteCells(COLUMN_FAMILY, COLUMN_QUALIFIER, TIMESTAMP_RANGE); + verify(mutation, times(1)).deleteCells(COLUMN_FAMILY, COLUMN_QUALIFIER, TIMESTAMP_RANGE); + Optional mutationData = + mutationDataBuilder.maybeBuild(TARGET_TABLE_NAME, ROW_KEY); + assertTrue(mutationData.isPresent()); + assertTrue(mutationData.get().getRequiredColumnFamilies().isEmpty()); + } + + @Test + public void testDeleteFamily() { + mutationDataBuilder.deleteFamily(COLUMN_FAMILY); + verify(mutation, times(1)).deleteFamily(COLUMN_FAMILY); + Optional mutationData = + mutationDataBuilder.maybeBuild(TARGET_TABLE_NAME, ROW_KEY); + assertTrue(mutationData.isPresent()); + assertTrue(mutationData.get().getRequiredColumnFamilies().isEmpty()); + } + + @Test + public void testSetCell() { + mutationDataBuilder.setCell(COLUMN_FAMILY, COLUMN_QUALIFIER, TIMESTAMP, VALUE); + verify(mutation, times(1)).setCell(COLUMN_FAMILY, COLUMN_QUALIFIER, TIMESTAMP, VALUE); + Optional mutationData = + mutationDataBuilder.maybeBuild(TARGET_TABLE_NAME, ROW_KEY); + assertTrue(mutationData.isPresent()); + assertEquals(mutationData.get().getRequiredColumnFamilies(), Set.of(COLUMN_FAMILY)); + } +} diff --git a/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/mapping/ValueMapperTest.java b/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/mapping/ValueMapperTest.java new file mode 100644 index 000000000..ce2762fd7 --- /dev/null +++ b/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/mapping/ValueMapperTest.java @@ -0,0 +1,751 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.kafka.connect.bigtable.mapping; + +import static com.google.cloud.kafka.connect.bigtable.util.MockUtil.assertTotalNumberOfInvocations; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; + +import com.google.cloud.bigtable.data.v2.models.Range; +import com.google.cloud.kafka.connect.bigtable.config.NullValueMode; +import com.google.cloud.kafka.connect.bigtable.util.JsonConverterFactory; +import com.google.protobuf.ByteString; +import java.math.BigDecimal; +import java.nio.charset.StandardCharsets; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.kafka.connect.data.Schema; +import org.apache.kafka.connect.data.SchemaBuilder; +import org.apache.kafka.connect.data.Struct; +import org.apache.kafka.connect.json.JsonConverter; +import org.junit.Ignore; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +@RunWith(JUnit4.class) +public class ValueMapperTest { + private static final String DEFAULT_COLUMN_FAMILY = "COLUMN_FAMILY"; + private static final String DEFAULT_COLUMN = "COLUMN_QUALIFIER"; + private static final ByteString DEFAULT_COLUMN_BYTES = + ByteString.copyFrom(DEFAULT_COLUMN.getBytes(StandardCharsets.UTF_8)); + private static final ByteString ROW_KEY = + ByteString.copyFrom("ROW_KEY".getBytes(StandardCharsets.UTF_8)); + private static final String TARGET_TABLE_NAME = "table"; + private static final Long TIMESTAMP = 2024L; + private static final Range.TimestampRange TIMESTAMP_RANGE = + Range.TimestampRange.create(0, TIMESTAMP); + private static final String DEFAULT_TOPIC = "topic"; + + private static final String NESTED_NULL_STRUCT_FIELD_NAME = "struct"; + private static final ByteString NESTED_NULL_STRUCT_FIELD_NAME_BYTES = + ByteString.copyFrom(NESTED_NULL_STRUCT_FIELD_NAME.getBytes(StandardCharsets.UTF_8)); + + private static final JsonConverter jsonConverter = JsonConverterFactory.create(false, false); + + @Test + public void testBoolean() { + Boolean value = true; + ByteString expected = ByteString.copyFrom(Bytes.toBytes(value)); + ValueMapper mapper = + new TestValueMapper(DEFAULT_COLUMN_FAMILY, DEFAULT_COLUMN, NullValueMode.IGNORE); + MutationDataBuilder mutationDataBuilder = mapper.getRecordMutationDataBuilder(value, TIMESTAMP); + verify(mutationDataBuilder, times(1)) + .setCell(DEFAULT_COLUMN_FAMILY, DEFAULT_COLUMN_BYTES, TIMESTAMP, expected); + assertTotalNumberOfInvocations(mutationDataBuilder, 1); + } + + @Test + public void testString() { + String value = "rrrrrrr"; + ByteString expected = ByteString.copyFrom(Bytes.toBytes(value)); + ValueMapper mapper = + new TestValueMapper(DEFAULT_COLUMN_FAMILY, DEFAULT_COLUMN, NullValueMode.IGNORE); + MutationDataBuilder mutationDataBuilder = mapper.getRecordMutationDataBuilder(value, TIMESTAMP); + verify(mutationDataBuilder, times(1)) + .setCell(DEFAULT_COLUMN_FAMILY, DEFAULT_COLUMN_BYTES, TIMESTAMP, expected); + assertTotalNumberOfInvocations(mutationDataBuilder, 1); + } + + @Test + public void testLong() { + Long value = 9223372036854775807L; + ByteString expected = ByteString.copyFrom(Bytes.toBytes(value)); + ValueMapper mapper = + new TestValueMapper(DEFAULT_COLUMN_FAMILY, DEFAULT_COLUMN, NullValueMode.IGNORE); + MutationDataBuilder mutationDataBuilder = mapper.getRecordMutationDataBuilder(value, TIMESTAMP); + verify(mutationDataBuilder, times(1)) + .setCell(DEFAULT_COLUMN_FAMILY, DEFAULT_COLUMN_BYTES, TIMESTAMP, expected); + assertTotalNumberOfInvocations(mutationDataBuilder, 1); + } + + @Test + public void testInteger() { + Integer value = -2147483648; + ByteString expected = ByteString.copyFrom(Bytes.toBytes(value)); + ValueMapper mapper = + new TestValueMapper(DEFAULT_COLUMN_FAMILY, DEFAULT_COLUMN, NullValueMode.IGNORE); + MutationDataBuilder mutationDataBuilder = mapper.getRecordMutationDataBuilder(value, TIMESTAMP); + verify(mutationDataBuilder, times(1)) + .setCell(DEFAULT_COLUMN_FAMILY, DEFAULT_COLUMN_BYTES, TIMESTAMP, expected); + assertTotalNumberOfInvocations(mutationDataBuilder, 1); + } + + @Test + public void testShort() { + Short value = 32767; + ByteString expected = ByteString.copyFrom(Bytes.toBytes(value)); + ValueMapper mapper = + new TestValueMapper(DEFAULT_COLUMN_FAMILY, DEFAULT_COLUMN, NullValueMode.IGNORE); + MutationDataBuilder mutationDataBuilder = mapper.getRecordMutationDataBuilder(value, TIMESTAMP); + verify(mutationDataBuilder, times(1)) + .setCell(DEFAULT_COLUMN_FAMILY, DEFAULT_COLUMN_BYTES, TIMESTAMP, expected); + assertTotalNumberOfInvocations(mutationDataBuilder, 1); + } + + @Test + public void testByte() { + Byte value = -128; + ByteString expected = ByteString.copyFrom(Bytes.toBytes(value)); + ValueMapper mapper = + new TestValueMapper(DEFAULT_COLUMN_FAMILY, DEFAULT_COLUMN, NullValueMode.IGNORE); + MutationDataBuilder mutationDataBuilder = mapper.getRecordMutationDataBuilder(value, TIMESTAMP); + verify(mutationDataBuilder, times(1)) + .setCell(DEFAULT_COLUMN_FAMILY, DEFAULT_COLUMN_BYTES, TIMESTAMP, expected); + assertTotalNumberOfInvocations(mutationDataBuilder, 1); + } + + @Test + public void testBytes() { + byte[] value = new byte[] {(byte) 37, (byte) 21}; + ByteString expected = ByteString.copyFrom(value); + ValueMapper mapper = + new TestValueMapper(DEFAULT_COLUMN_FAMILY, DEFAULT_COLUMN, NullValueMode.IGNORE); + MutationDataBuilder mutationDataBuilder = mapper.getRecordMutationDataBuilder(value, TIMESTAMP); + verify(mutationDataBuilder, times(1)) + .setCell(DEFAULT_COLUMN_FAMILY, DEFAULT_COLUMN_BYTES, TIMESTAMP, expected); + assertTotalNumberOfInvocations(mutationDataBuilder, 1); + } + + @Test + public void testFloat() { + Float value = 128.37157f; + ByteString expected = ByteString.copyFrom(Bytes.toBytes(value)); + ValueMapper mapper = + new TestValueMapper(DEFAULT_COLUMN_FAMILY, DEFAULT_COLUMN, NullValueMode.IGNORE); + MutationDataBuilder mutationDataBuilder = mapper.getRecordMutationDataBuilder(value, TIMESTAMP); + verify(mutationDataBuilder, times(1)) + .setCell(DEFAULT_COLUMN_FAMILY, DEFAULT_COLUMN_BYTES, TIMESTAMP, expected); + assertTotalNumberOfInvocations(mutationDataBuilder, 1); + } + + @Test + public void testDouble() { + Double value = 128.37157; + ByteString expected = ByteString.copyFrom(Bytes.toBytes(value)); + ValueMapper mapper = + new TestValueMapper(DEFAULT_COLUMN_FAMILY, DEFAULT_COLUMN, NullValueMode.IGNORE); + MutationDataBuilder mutationDataBuilder = mapper.getRecordMutationDataBuilder(value, TIMESTAMP); + verify(mutationDataBuilder, times(1)) + .setCell(DEFAULT_COLUMN_FAMILY, DEFAULT_COLUMN_BYTES, TIMESTAMP, expected); + assertTotalNumberOfInvocations(mutationDataBuilder, 1); + } + + @Test + public void testDoubleSpecial() { + Double value = Double.NaN; + ByteString expected = ByteString.copyFrom(Bytes.toBytes(value)); + ValueMapper mapper = + new TestValueMapper(DEFAULT_COLUMN_FAMILY, DEFAULT_COLUMN, NullValueMode.IGNORE); + MutationDataBuilder mutationDataBuilder = mapper.getRecordMutationDataBuilder(value, TIMESTAMP); + verify(mutationDataBuilder, times(1)) + .setCell(DEFAULT_COLUMN_FAMILY, DEFAULT_COLUMN_BYTES, TIMESTAMP, expected); + assertTotalNumberOfInvocations(mutationDataBuilder, 1); + } + + @Ignore // TODO: fix it. + @Test + public void testDate() { + // TODO: is it correct? Or maybe should the implementation first convert it into logical value? + Long value = 1732822801000L; + ByteString expected = ByteString.copyFrom(Bytes.toBytes(value)); + ValueMapper mapper = + new TestValueMapper(DEFAULT_COLUMN_FAMILY, DEFAULT_COLUMN, NullValueMode.IGNORE); + MutationDataBuilder mutationDataBuilder = + mapper.getRecordMutationDataBuilder(new Date(value), TIMESTAMP); + verify(mutationDataBuilder, times(1)) + .setCell(DEFAULT_COLUMN_FAMILY, DEFAULT_COLUMN_BYTES, TIMESTAMP, expected); + assertTotalNumberOfInvocations(mutationDataBuilder, 1); + } + + @Ignore // TODO: fix it. + @Test + public void testDecimal() { + // TODO: is it correct? Or maybe should the implementation first convert it into logical value? + BigDecimal value = new BigDecimal("0.30000000000000000004"); + ByteString expected = ByteString.copyFrom(Bytes.toBytes(value)); + ValueMapper mapper = + new TestValueMapper(DEFAULT_COLUMN_FAMILY, DEFAULT_COLUMN, NullValueMode.IGNORE); + MutationDataBuilder mutationDataBuilder = mapper.getRecordMutationDataBuilder(value, TIMESTAMP); + verify(mutationDataBuilder, times(1)) + .setCell(DEFAULT_COLUMN_FAMILY, DEFAULT_COLUMN_BYTES, TIMESTAMP, expected); + assertTotalNumberOfInvocations(mutationDataBuilder, 1); + } + + @Test + public void testArray() { + List value = List.of("1", 2, true); + ValueMapper mapper = + new TestValueMapper(DEFAULT_COLUMN_FAMILY, DEFAULT_COLUMN, NullValueMode.IGNORE); + MutationDataBuilder mutationDataBuilder = mapper.getRecordMutationDataBuilder(value, TIMESTAMP); + verify(mutationDataBuilder, times(1)) + .setCell( + DEFAULT_COLUMN_FAMILY, + DEFAULT_COLUMN_BYTES, + TIMESTAMP, + ByteString.copyFrom("[\"1\",2,true]".getBytes(StandardCharsets.UTF_8))); + assertTotalNumberOfInvocations(mutationDataBuilder, 1); + } + + @Test + public void testRootValueNeedsBothDefaultColumns() { + Integer value = 123; + for (ValueMapper mapper : + List.of( + new TestValueMapper(null, null, NullValueMode.WRITE), + new TestValueMapper(DEFAULT_COLUMN_FAMILY, null, NullValueMode.WRITE), + new TestValueMapper(null, DEFAULT_COLUMN, NullValueMode.WRITE))) { + MutationDataBuilder mutationDataBuilder = + mapper.getRecordMutationDataBuilder(value, TIMESTAMP); + verify(mutationDataBuilder, times(0)) + .setCell( + DEFAULT_COLUMN_FAMILY, + DEFAULT_COLUMN_BYTES, + TIMESTAMP, + ByteString.copyFrom(Bytes.toBytes(value))); + } + ValueMapper mapper = + new TestValueMapper(DEFAULT_COLUMN_FAMILY, DEFAULT_COLUMN, NullValueMode.WRITE); + MutationDataBuilder mutationDataBuilder = mapper.getRecordMutationDataBuilder(value, TIMESTAMP); + verify(mutationDataBuilder, times(1)) + .setCell( + DEFAULT_COLUMN_FAMILY, + DEFAULT_COLUMN_BYTES, + TIMESTAMP, + ByteString.copyFrom(Bytes.toBytes(value))); + assertTotalNumberOfInvocations(mutationDataBuilder, 1); + } + + @Test + public void testValueNestedOnceNeedsOnlyDefaultColumnFamily() { + Object value = fromJson("{\"key\": 2}"); + ValueMapper mapper = new TestValueMapper(DEFAULT_COLUMN_FAMILY, null, NullValueMode.WRITE); + MutationDataBuilder mutationDataBuilder = mapper.getRecordMutationDataBuilder(value, TIMESTAMP); + verify(mutationDataBuilder, times(1)) + .setCell( + DEFAULT_COLUMN_FAMILY, + ByteString.copyFrom("key".getBytes(StandardCharsets.UTF_8)), + TIMESTAMP, + ByteString.copyFrom(Bytes.toBytes(2L))); + + assertTotalNumberOfInvocations(mutationDataBuilder, 1); + } + + @Test + public void testMultipleOperationsAtOnce() { + Object value = fromJson("{\"a\":{\"b\":789},\"c\":true,\"x\":{\"y\":null},\"z\":null}"); + ValueMapper mapper = + new TestValueMapper(DEFAULT_COLUMN_FAMILY, DEFAULT_COLUMN, NullValueMode.DELETE); + MutationDataBuilder mutationDataBuilder = mapper.getRecordMutationDataBuilder(value, TIMESTAMP); + verify(mutationDataBuilder, times(1)) + .setCell( + "a", + ByteString.copyFrom("b".getBytes(StandardCharsets.UTF_8)), + TIMESTAMP, + ByteString.copyFrom(Bytes.toBytes(789L))); + verify(mutationDataBuilder, times(1)) + .setCell( + DEFAULT_COLUMN_FAMILY, + ByteString.copyFrom("c".getBytes(StandardCharsets.UTF_8)), + TIMESTAMP, + ByteString.copyFrom(Bytes.toBytes(true))); + verify(mutationDataBuilder, times(1)) + .deleteCells( + "x", + ByteString.copyFrom("y".getBytes(StandardCharsets.UTF_8)), + Range.TimestampRange.create(0, TIMESTAMP)); + verify(mutationDataBuilder, times(1)).deleteFamily("z"); + assertTotalNumberOfInvocations(mutationDataBuilder, 4); + } + + @Test + public void testMap() { + Object outerMapKey = 123456; + Object innerMapKey = "innerMapKey"; + String familyToBeDeleted = "familyToBeDeleted"; + String columnToBeDeleted = "columnToBeDeleted"; + ByteString columnToBeDeletedBytes = + ByteString.copyFrom(columnToBeDeleted.getBytes(StandardCharsets.UTF_8)); + Object innermostNullKey = "innermostNullKey"; + + Object value = "value"; + ByteString valueBytes = ByteString.copyFrom(((String) value).getBytes(StandardCharsets.UTF_8)); + Object valueKey = "valueKey"; + ByteString valueKeyBytes = + ByteString.copyFrom(((String) valueKey).getBytes(StandardCharsets.UTF_8)); + + Map innermostMap = new HashMap<>(); + Map innerMap = new HashMap<>(); + Map outerMap = new HashMap<>(); + + outerMap.put(outerMapKey, innerMap); + innerMap.put(innerMapKey, innermostMap); + + outerMap.put(valueKey, value); + innerMap.put(valueKey, value); + innermostMap.put(valueKey, value); + + outerMap.put(familyToBeDeleted, null); + innerMap.put(columnToBeDeleted, null); + innermostMap.put(innermostNullKey, null); + + /* + { + outerMapKey: { + innerMapKey: { + valueKey: value, + innermostNullKey: null, + } + valueKey: value, + columnToBeDeleted: null, + } + valueKey: value, + familyToBeDeleted: null, + } + */ + String expectedJsonification = "{\"innermostNullKey\":null,\"valueKey\":\"value\"}"; + ByteString expectedJsonificationBytes = + ByteString.copyFrom(expectedJsonification.getBytes(StandardCharsets.UTF_8)); + + ValueMapper mapper = + new TestValueMapper(DEFAULT_COLUMN_FAMILY, DEFAULT_COLUMN, NullValueMode.DELETE); + MutationDataBuilder mutationDataBuilder = + mapper.getRecordMutationDataBuilder(outerMap, TIMESTAMP); + verify(mutationDataBuilder, times(1)).deleteFamily(familyToBeDeleted); + verify(mutationDataBuilder, times(1)) + .setCell(DEFAULT_COLUMN_FAMILY, valueKeyBytes, TIMESTAMP, valueBytes); + verify(mutationDataBuilder, times(1)) + .deleteCells( + outerMapKey.toString(), + columnToBeDeletedBytes, + Range.TimestampRange.create(0, TIMESTAMP)); + verify(mutationDataBuilder, times(1)) + .setCell(outerMapKey.toString(), valueKeyBytes, TIMESTAMP, valueBytes); + verify(mutationDataBuilder, times(1)) + .setCell( + outerMapKey.toString(), + ByteString.copyFrom(innerMapKey.toString().getBytes(StandardCharsets.UTF_8)), + TIMESTAMP, + expectedJsonificationBytes); + assertTotalNumberOfInvocations(mutationDataBuilder, 5); + } + + @Test + public void testJsonificationOfNonJsonNativeTypes() { + final String dateFieldName = "KafkaDate"; + final String timestampFieldName = "KafkaTimestamp"; + final String timeFieldName = "KafkaTime"; + final String decimalFieldName = "KafkaDecimal"; + final String bytesFieldName = "KafkaBytes"; + final Long dateLong = 1488406838808L; + final Date date = new Date(dateLong); + final String decimalString = "0.30000000000000004"; + final Integer decimalScale = 0; + final BigDecimal decimal = new BigDecimal(decimalString); + final byte[] bytes = "bytes\0".getBytes(StandardCharsets.UTF_8); + final String schemaStructFieldName = "schema"; + final ByteString schemaStructFieldNameBytes = + ByteString.copyFrom(schemaStructFieldName.getBytes(StandardCharsets.UTF_8)); + final String schemalessMapFieldName = "schemaless"; + final ByteString schemalessMapFieldNameBytes = + ByteString.copyFrom(schemalessMapFieldName.getBytes(StandardCharsets.UTF_8)); + + Schema structSchema = + SchemaBuilder.struct() + .field(dateFieldName, org.apache.kafka.connect.data.Date.SCHEMA) + .field(timestampFieldName, org.apache.kafka.connect.data.Timestamp.SCHEMA) + .field(timeFieldName, org.apache.kafka.connect.data.Timestamp.SCHEMA) + .field(decimalFieldName, org.apache.kafka.connect.data.Decimal.schema(decimalScale)) + .field(bytesFieldName, Schema.BYTES_SCHEMA) + .build(); + Struct struct = new Struct(structSchema); + Map map = new TreeMap<>(); // Note we need this map to be ordered! + + Map outerMap = new HashMap<>(); + Map innerMap = new HashMap<>(); + + outerMap.put(DEFAULT_COLUMN_FAMILY, innerMap); + innerMap.put(schemaStructFieldName, struct); + innerMap.put(schemalessMapFieldName, map); + struct.put(dateFieldName, date); + map.put(dateFieldName, date); + struct.put(timestampFieldName, date); + map.put(timestampFieldName, date); + struct.put(timeFieldName, date); + map.put(timeFieldName, date); + struct.put(decimalFieldName, decimal); + map.put(decimalFieldName, decimal); + struct.put(bytesFieldName, bytes); + map.put(bytesFieldName, bytes); + + String expectedStringificationWithoutSchema = + "{\"KafkaBytes\":\"Ynl0ZXMA\",\"KafkaDate\":1488406838808,\"KafkaDecimal\":0.30000000000000004,\"KafkaTime\":1488406838808,\"KafkaTimestamp\":1488406838808}"; + ByteString expectedStringificationWithoutSchemaBytes = + ByteString.copyFrom(expectedStringificationWithoutSchema.getBytes(StandardCharsets.UTF_8)); + // TODO: shouldn't it be different than schemaless serialization? (e.g., count 'time' modulo + // 24h) + String expectedStringificationWithSchema = + "{\"KafkaDate\":1488406838808,\"KafkaTimestamp\":1488406838808,\"KafkaTime\":1488406838808,\"KafkaDecimal\":0.30000000000000004,\"KafkaBytes\":\"Ynl0ZXMA\"}"; + ByteString expectedStringificationWithSchemaBytes = + ByteString.copyFrom(expectedStringificationWithSchema.getBytes(StandardCharsets.UTF_8)); + + ValueMapper mapper = new TestValueMapper(null, null, NullValueMode.DELETE); + MutationDataBuilder mutationDataBuilder = + mapper.getRecordMutationDataBuilder(outerMap, TIMESTAMP); + verify(mutationDataBuilder, times(1)) + .setCell( + DEFAULT_COLUMN_FAMILY, + schemalessMapFieldNameBytes, + TIMESTAMP, + expectedStringificationWithoutSchemaBytes); + verify(mutationDataBuilder, times(1)) + .setCell( + DEFAULT_COLUMN_FAMILY, + schemaStructFieldNameBytes, + TIMESTAMP, + expectedStringificationWithSchemaBytes); + assertTotalNumberOfInvocations(mutationDataBuilder, 2); + } + + @Test + public void testStruct() { + final String structFieldName = "struct"; + final ByteString structFieldNameBytes = + ByteString.copyFrom(structFieldName.getBytes(StandardCharsets.UTF_8)); + final String valueFieldName = "value"; + final ByteString valueFieldNameBytes = + ByteString.copyFrom(valueFieldName.getBytes(StandardCharsets.UTF_8)); + final String optionalFieldName = "optional"; + final ByteString optionalFieldNameBytes = + ByteString.copyFrom(optionalFieldName.getBytes(StandardCharsets.UTF_8)); + final byte[] value = "value\0".getBytes(StandardCharsets.UTF_8); + + Schema innermostStructSchema = + SchemaBuilder.struct() + .field(valueFieldName, Schema.BYTES_SCHEMA) + .field(optionalFieldName, Schema.OPTIONAL_INT8_SCHEMA) + .build(); + Schema innerStructSchema = + SchemaBuilder.struct() + .field(structFieldName, innermostStructSchema) + .field(valueFieldName, Schema.BYTES_SCHEMA) + .field(optionalFieldName, Schema.OPTIONAL_INT8_SCHEMA) + .build(); + Schema outerStructSchema = + SchemaBuilder.struct() + .field(structFieldName, innerStructSchema) + .field(valueFieldName, Schema.BYTES_SCHEMA) + .field(optionalFieldName, Schema.OPTIONAL_INT8_SCHEMA) + .build(); + + Struct innermostStruct = new Struct(innermostStructSchema); + innermostStruct.put(valueFieldName, value); + + String expectedInnermostStringification = "{\"value\":\"dmFsdWUA\",\"optional\":null}"; + ByteString expectedInnermostStringificationBytes = + ByteString.copyFrom(expectedInnermostStringification.getBytes(StandardCharsets.UTF_8)); + + Struct innerStruct = new Struct(innerStructSchema); + innerStruct.put(structFieldName, innermostStruct); + innerStruct.put(valueFieldName, value); + + Struct struct = new Struct(outerStructSchema); + struct.put(structFieldName, innerStruct); + struct.put(valueFieldName, value); + + /* + { + struct: { + struct: { + optionalFieldName: null, + valueFieldName: value, + } + optionalFieldName: null, + valueFieldName: value, + } + optionalFieldName: null, + valueFieldName: value, + } + */ + ValueMapper mapper = + new TestValueMapper(DEFAULT_COLUMN_FAMILY, DEFAULT_COLUMN, NullValueMode.DELETE); + MutationDataBuilder mutationDataBuilder = + mapper.getRecordMutationDataBuilder(struct, TIMESTAMP); + verify(mutationDataBuilder, times(1)) + .setCell(DEFAULT_COLUMN_FAMILY, valueFieldNameBytes, TIMESTAMP, ByteString.copyFrom(value)); + verify(mutationDataBuilder, times(1)).deleteFamily(optionalFieldName); + verify(mutationDataBuilder, times(1)) + .setCell(structFieldName, valueFieldNameBytes, TIMESTAMP, ByteString.copyFrom(value)); + verify(mutationDataBuilder, times(1)) + .deleteCells( + structFieldName, optionalFieldNameBytes, Range.TimestampRange.create(0, TIMESTAMP)); + verify(mutationDataBuilder, times(1)) + .setCell( + structFieldName, + structFieldNameBytes, + TIMESTAMP, + expectedInnermostStringificationBytes); + assertTotalNumberOfInvocations(mutationDataBuilder, 5); + assertTrue(mutationDataBuilder.maybeBuild(TARGET_TABLE_NAME, ROW_KEY).isPresent()); + } + + @Test + public void testEmpty() { + Schema emptyStructSchema = SchemaBuilder.struct().build(); + Struct emptyStruct = new Struct(emptyStructSchema); + Map emptyMap = new HashMap<>(); + ValueMapper mapper = + new TestValueMapper(DEFAULT_COLUMN_FAMILY, DEFAULT_COLUMN, NullValueMode.WRITE); + for (Object value : List.of(emptyMap, emptyStruct)) { + MutationDataBuilder mutationDataBuilder = + mapper.getRecordMutationDataBuilder(value, TIMESTAMP); + assertTotalNumberOfInvocations(mutationDataBuilder, 0); + assertTrue(mutationDataBuilder.maybeBuild(TARGET_TABLE_NAME, ROW_KEY).isEmpty()); + } + } + + @Test + public void testSimpleCase1() { + Object value = fromJson("{\"foo\": {\"bar\": 1}}"); + ValueMapper mapper = new TestValueMapper(null, null, NullValueMode.IGNORE); + MutationDataBuilder mutationDataBuilder = mapper.getRecordMutationDataBuilder(value, TIMESTAMP); + verify(mutationDataBuilder, times(1)) + .setCell( + "foo", + ByteString.copyFrom("bar".getBytes(StandardCharsets.UTF_8)), + TIMESTAMP, + ByteString.copyFrom(Bytes.toBytes(1L))); + assertTotalNumberOfInvocations(mutationDataBuilder, 1); + assertTrue(mutationDataBuilder.maybeBuild(TARGET_TABLE_NAME, ROW_KEY).isPresent()); + } + + @Test + public void testSimpleCase2() { + Object value = fromJson("{\"foo\": {\"bar\": {\"fizz\": 1}}}"); + ValueMapper mapper = new TestValueMapper(null, null, NullValueMode.IGNORE); + MutationDataBuilder mutationDataBuilder = mapper.getRecordMutationDataBuilder(value, TIMESTAMP); + verify(mutationDataBuilder, times(1)) + .setCell( + "foo", + ByteString.copyFrom("bar".getBytes(StandardCharsets.UTF_8)), + TIMESTAMP, + ByteString.copyFrom("{\"fizz\":1}".getBytes(StandardCharsets.UTF_8))); + assertTrue(mutationDataBuilder.maybeBuild(TARGET_TABLE_NAME, ROW_KEY).isPresent()); + } + + @Test + public void testSimpleCase3() { + Object value = fromJson("{\"foo\": 1}"); + ValueMapper mapper = new TestValueMapper(DEFAULT_COLUMN_FAMILY, null, NullValueMode.IGNORE); + MutationDataBuilder mutationDataBuilder = mapper.getRecordMutationDataBuilder(value, TIMESTAMP); + verify(mutationDataBuilder, times(1)) + .setCell( + DEFAULT_COLUMN_FAMILY, + ByteString.copyFrom("foo".getBytes(StandardCharsets.UTF_8)), + TIMESTAMP, + ByteString.copyFrom(Bytes.toBytes(1L))); + assertTotalNumberOfInvocations(mutationDataBuilder, 1); + assertTrue(mutationDataBuilder.maybeBuild(TARGET_TABLE_NAME, ROW_KEY).isPresent()); + } + + @Test + public void testNullModeIgnoreRoot() { + ValueMapper mapper = + new TestValueMapper(DEFAULT_COLUMN_FAMILY, DEFAULT_COLUMN, NullValueMode.IGNORE); + MutationDataBuilder mutationDataBuilder = mapper.getRecordMutationDataBuilder(null, TIMESTAMP); + assertTotalNumberOfInvocations(mutationDataBuilder, 0); + assertTrue(mutationDataBuilder.maybeBuild(TARGET_TABLE_NAME, ROW_KEY).isEmpty()); + } + + @Test + public void testNullModeIgnoreNestedOnce() { + ValueMapper mapper = + new TestValueMapper(DEFAULT_COLUMN_FAMILY, DEFAULT_COLUMN, NullValueMode.IGNORE); + MutationDataBuilder mutationDataBuilder = + mapper.getRecordMutationDataBuilder(getStructhWithNullOnNthNestingLevel(1), TIMESTAMP); + assertTotalNumberOfInvocations(mutationDataBuilder, 0); + assertTrue(mutationDataBuilder.maybeBuild(TARGET_TABLE_NAME, ROW_KEY).isEmpty()); + } + + @Test + public void testNullModeIgnoreNestedTwice() { + ValueMapper mapper = + new TestValueMapper(DEFAULT_COLUMN_FAMILY, DEFAULT_COLUMN, NullValueMode.IGNORE); + MutationDataBuilder mutationDataBuilder = + mapper.getRecordMutationDataBuilder(getStructhWithNullOnNthNestingLevel(2), TIMESTAMP); + assertTotalNumberOfInvocations(mutationDataBuilder, 0); + assertTrue(mutationDataBuilder.maybeBuild(TARGET_TABLE_NAME, ROW_KEY).isEmpty()); + } + + @Test + public void testNullModeWriteRoot() { + ValueMapper mapper = + new TestValueMapper(DEFAULT_COLUMN_FAMILY, DEFAULT_COLUMN, NullValueMode.WRITE); + MutationDataBuilder mutationDataBuilder = mapper.getRecordMutationDataBuilder(null, TIMESTAMP); + verify(mutationDataBuilder, times(1)) + .setCell(DEFAULT_COLUMN_FAMILY, DEFAULT_COLUMN_BYTES, TIMESTAMP, ByteString.empty()); + assertTotalNumberOfInvocations(mutationDataBuilder, 1); + assertTrue(mutationDataBuilder.maybeBuild(TARGET_TABLE_NAME, ROW_KEY).isPresent()); + } + + @Test + public void testNullModeWriteNestedOnce() { + ValueMapper mapper = + new TestValueMapper(DEFAULT_COLUMN_FAMILY, DEFAULT_COLUMN, NullValueMode.WRITE); + MutationDataBuilder mutationDataBuilder = + mapper.getRecordMutationDataBuilder(getStructhWithNullOnNthNestingLevel(1), TIMESTAMP); + verify(mutationDataBuilder, times(1)) + .setCell( + DEFAULT_COLUMN_FAMILY, + NESTED_NULL_STRUCT_FIELD_NAME_BYTES, + TIMESTAMP, + ByteString.empty()); + assertTotalNumberOfInvocations(mutationDataBuilder, 1); + assertTrue(mutationDataBuilder.maybeBuild(TARGET_TABLE_NAME, ROW_KEY).isPresent()); + } + + @Test + public void testNullModeWriteNestedTwice() { + ValueMapper mapper = + new TestValueMapper(DEFAULT_COLUMN_FAMILY, DEFAULT_COLUMN, NullValueMode.WRITE); + MutationDataBuilder mutationDataBuilder = + mapper.getRecordMutationDataBuilder(getStructhWithNullOnNthNestingLevel(2), TIMESTAMP); + verify(mutationDataBuilder, times(1)) + .setCell( + NESTED_NULL_STRUCT_FIELD_NAME, + NESTED_NULL_STRUCT_FIELD_NAME_BYTES, + TIMESTAMP, + ByteString.empty()); + assertTotalNumberOfInvocations(mutationDataBuilder, 1); + assertTrue(mutationDataBuilder.maybeBuild(TARGET_TABLE_NAME, ROW_KEY).isPresent()); + } + + @Test + public void testNullModeDeleteRoot() { + ValueMapper mapper = + new TestValueMapper(DEFAULT_COLUMN_FAMILY, DEFAULT_COLUMN, NullValueMode.DELETE); + MutationDataBuilder mutationDataBuilder = mapper.getRecordMutationDataBuilder(null, TIMESTAMP); + verify(mutationDataBuilder, times(1)).deleteRow(); + assertTotalNumberOfInvocations(mutationDataBuilder, 1); + assertTrue(mutationDataBuilder.maybeBuild(TARGET_TABLE_NAME, ROW_KEY).isPresent()); + } + + @Test + public void testNullModeDeleteNestedOnce() { + ValueMapper mapper = + new TestValueMapper(DEFAULT_COLUMN_FAMILY, DEFAULT_COLUMN, NullValueMode.DELETE); + MutationDataBuilder mutationDataBuilder = + mapper.getRecordMutationDataBuilder(getStructhWithNullOnNthNestingLevel(1), TIMESTAMP); + verify(mutationDataBuilder, times(1)).deleteFamily(NESTED_NULL_STRUCT_FIELD_NAME); + assertTotalNumberOfInvocations(mutationDataBuilder, 1); + assertTrue(mutationDataBuilder.maybeBuild(TARGET_TABLE_NAME, ROW_KEY).isPresent()); + } + + @Test + public void testNullModeDeleteNestedTwice() { + ValueMapper mapper = + new TestValueMapper(DEFAULT_COLUMN_FAMILY, DEFAULT_COLUMN, NullValueMode.DELETE); + MutationDataBuilder mutationDataBuilder = + mapper.getRecordMutationDataBuilder(getStructhWithNullOnNthNestingLevel(2), TIMESTAMP); + verify(mutationDataBuilder, times(1)) + .deleteCells( + NESTED_NULL_STRUCT_FIELD_NAME, NESTED_NULL_STRUCT_FIELD_NAME_BYTES, TIMESTAMP_RANGE); + assertTotalNumberOfInvocations(mutationDataBuilder, 1); + assertTrue(mutationDataBuilder.maybeBuild(TARGET_TABLE_NAME, ROW_KEY).isPresent()); + } + + @Test + public void testNullModeNestedThrice() { + ValueMapper mapper = + new TestValueMapper(DEFAULT_COLUMN_FAMILY, DEFAULT_COLUMN, NullValueMode.IGNORE); + String expectedJsonification = "{\"struct\":null}"; + ByteString expectedJsonificationBytes = + ByteString.copyFrom(expectedJsonification.getBytes(StandardCharsets.UTF_8)); + MutationDataBuilder mutationDataBuilder = + mapper.getRecordMutationDataBuilder(getStructhWithNullOnNthNestingLevel(3), TIMESTAMP); + verify(mutationDataBuilder, times(1)) + .setCell( + NESTED_NULL_STRUCT_FIELD_NAME, + NESTED_NULL_STRUCT_FIELD_NAME_BYTES, + TIMESTAMP, + expectedJsonificationBytes); + assertTotalNumberOfInvocations(mutationDataBuilder, 1); + assertTrue(mutationDataBuilder.maybeBuild(TARGET_TABLE_NAME, ROW_KEY).isPresent()); + } + + private static Struct getStructhWithNullOnNthNestingLevel(int n) { + assert n > 0; + + Schema schema = + SchemaBuilder.struct() + .field(NESTED_NULL_STRUCT_FIELD_NAME, SchemaBuilder.struct().optional()) + .build(); + // We consider a Struct with a null child to be a level 1 nested struct. + Struct struct = new Struct(schema); + + while (n > 1) { + n -= 1; + schema = + SchemaBuilder.struct().field(NESTED_NULL_STRUCT_FIELD_NAME, schema).optional().build(); + final Struct outerStruct = new Struct(schema); + outerStruct.put(NESTED_NULL_STRUCT_FIELD_NAME, struct); + struct = outerStruct; + } + return struct; + } + + private static Object fromJson(String s) { + return jsonConverter.toConnectData(DEFAULT_TOPIC, s.getBytes(StandardCharsets.UTF_8)).value(); + } + + private static class TestValueMapper extends ValueMapper { + public TestValueMapper( + String defaultColumnFamily, String defaultColumnQualifier, NullValueMode nullMode) { + super(defaultColumnFamily, defaultColumnQualifier, nullMode); + } + + @Override + protected MutationDataBuilder createMutationDataBuilder() { + return spy(super.createMutationDataBuilder()); + } + } +} diff --git a/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/util/ApiExceptionFactory.java b/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/util/ApiExceptionFactory.java new file mode 100644 index 000000000..f09740dbe --- /dev/null +++ b/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/util/ApiExceptionFactory.java @@ -0,0 +1,35 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.kafka.connect.bigtable.util; + +import com.google.api.gax.grpc.GrpcStatusCode; +import com.google.api.gax.rpc.ApiException; +import com.google.api.gax.rpc.StatusCode; +import io.grpc.Status; + +public class ApiExceptionFactory { + public static ApiException create() { + return create(Status.Code.NOT_FOUND); + } + + public static ApiException create(Status.Code code) { + return create(new Throwable(), GrpcStatusCode.of(code), true); + } + + public static ApiException create(Throwable cause, StatusCode code, boolean retryable) { + return com.google.api.gax.rpc.ApiExceptionFactory.createException(cause, code, retryable); + } +} diff --git a/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/util/BasicPropertiesFactory.java b/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/util/BasicPropertiesFactory.java new file mode 100644 index 000000000..3cdc7a2a7 --- /dev/null +++ b/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/util/BasicPropertiesFactory.java @@ -0,0 +1,36 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.kafka.connect.bigtable.util; + +import com.google.cloud.kafka.connect.bigtable.config.BigtableSinkConfig; +import com.google.cloud.kafka.connect.bigtable.config.BigtableSinkTaskConfig; +import java.util.HashMap; +import java.util.Map; + +public class BasicPropertiesFactory { + public static Map getSinkProps() { + Map props = new HashMap<>(); + props.put(BigtableSinkConfig.CONFIG_GCP_PROJECT_ID, "project"); + props.put(BigtableSinkConfig.CONFIG_BIGTABLE_INSTANCE_ID, "instance"); + return props; + } + + public static Map getTaskProps() { + Map props = getSinkProps(); + props.put(BigtableSinkTaskConfig.CONFIG_TASK_ID, "1"); + return props; + } +} diff --git a/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/util/FutureUtil.java b/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/util/FutureUtil.java new file mode 100644 index 000000000..a4d4c7875 --- /dev/null +++ b/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/util/FutureUtil.java @@ -0,0 +1,33 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.kafka.connect.bigtable.util; + +import com.google.api.core.ApiFuture; +import com.google.api.core.SettableApiFuture; + +public class FutureUtil { + public static ApiFuture completedApiFuture(T value) { + SettableApiFuture future = SettableApiFuture.create(); + future.set(value); + return future; + } + + public static ApiFuture failedApiFuture(Exception exception) { + SettableApiFuture future = SettableApiFuture.create(); + future.setException(exception); + return future; + } +} diff --git a/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/util/JsonConverterFactory.java b/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/util/JsonConverterFactory.java new file mode 100644 index 000000000..4192fec3c --- /dev/null +++ b/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/util/JsonConverterFactory.java @@ -0,0 +1,29 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.kafka.connect.bigtable.util; + +import java.util.Map; +import org.apache.kafka.connect.json.JsonConverter; + +public class JsonConverterFactory { + public static JsonConverter create(boolean schemasEnable, boolean isKafkaKeyConverter) { + Map jsonConverterProps = + Map.of("schemas.enable", Boolean.toString(schemasEnable)); + JsonConverter jsonConverter = new JsonConverter(); + jsonConverter.configure(jsonConverterProps, isKafkaKeyConverter); + return jsonConverter; + } +} diff --git a/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/util/MockUtil.java b/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/util/MockUtil.java new file mode 100644 index 000000000..03f753400 --- /dev/null +++ b/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/util/MockUtil.java @@ -0,0 +1,29 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.kafka.connect.bigtable.util; + +import static org.junit.Assert.assertEquals; +import static org.mockito.Mockito.mockingDetails; + +import java.util.Collection; +import org.mockito.invocation.Invocation; + +public class MockUtil { + public static void assertTotalNumberOfInvocations(Object mock, int expected) { + Collection invocations = mockingDetails(mock).getInvocations(); + assertEquals(expected, invocations.size()); + } +} diff --git a/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/util/TestId.java b/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/util/TestId.java new file mode 100644 index 000000000..f32e8a63d --- /dev/null +++ b/google-cloud-bigtable-kafka-connect-sink/src/test/java/com/google/cloud/kafka/connect/bigtable/util/TestId.java @@ -0,0 +1,32 @@ +/* + * Copyright 2025 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.kafka.connect.bigtable.util; + +import com.google.common.collect.Streams; + +public class TestId { + public static String getTestClassId(Class testClass) { + return testClass.getSimpleName(); + } + + public static String getTestCaseId(Class testClass) { + StackWalker.StackFrame frame = + StackWalker.getInstance(StackWalker.Option.RETAIN_CLASS_REFERENCE) + .walk(s -> Streams.findLast(s.filter(f -> f.getDeclaringClass().equals(testClass)))) + .get(); + return getTestClassId(frame.getDeclaringClass()) + frame.getMethodName(); + } +} diff --git a/google-cloud-bigtable-kafka-connect-sink/src/test/resources/fake_service_key.json b/google-cloud-bigtable-kafka-connect-sink/src/test/resources/fake_service_key.json new file mode 100644 index 000000000..f3eac606f --- /dev/null +++ b/google-cloud-bigtable-kafka-connect-sink/src/test/resources/fake_service_key.json @@ -0,0 +1,9 @@ +{ + "type": "service_account", + "private_key_id": "abc", + "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDY3E8o1NEFcjMM\nHW/5ZfFJw29/8NEqpViNjQIx95Xx5KDtJ+nWn9+OW0uqsSqKlKGhAdAo+Q6bjx2c\nuXVsXTu7XrZUY5Kltvj94DvUa1wjNXs606r/RxWTJ58bfdC+gLLxBfGnB6CwK0YQ\nxnfpjNbkUfVVzO0MQD7UP0Hl5ZcY0Puvxd/yHuONQn/rIAieTHH1pqgW+zrH/y3c\n59IGThC9PPtugI9ea8RSnVj3PWz1bX2UkCDpy9IRh9LzJLaYYX9RUd7++dULUlat\nAaXBh1U6emUDzhrIsgApjDVtimOPbmQWmX1S60mqQikRpVYZ8u+NDD+LNw+/Eovn\nxCj2Y3z1AgMBAAECggEAWDBzoqO1IvVXjBA2lqId10T6hXmN3j1ifyH+aAqK+FVl\nGjyWjDj0xWQcJ9ync7bQ6fSeTeNGzP0M6kzDU1+w6FgyZqwdmXWI2VmEizRjwk+/\n/uLQUcL7I55Dxn7KUoZs/rZPmQDxmGLoue60Gg6z3yLzVcKiDc7cnhzhdBgDc8vd\nQorNAlqGPRnm3EqKQ6VQp6fyQmCAxrr45kspRXNLddat3AMsuqImDkqGKBmF3Q1y\nxWGe81LphUiRqvqbyUlh6cdSZ8pLBpc9m0c3qWPKs9paqBIvgUPlvOZMqec6x4S6\nChbdkkTRLnbsRr0Yg/nDeEPlkhRBhasXpxpMUBgPywKBgQDs2axNkFjbU94uXvd5\nznUhDVxPFBuxyUHtsJNqW4p/ujLNimGet5E/YthCnQeC2P3Ym7c3fiz68amM6hiA\nOnW7HYPZ+jKFnefpAtjyOOs46AkftEg07T9XjwWNPt8+8l0DYawPoJgbM5iE0L2O\nx8TU1Vs4mXc+ql9F90GzI0x3VwKBgQDqZOOqWw3hTnNT07Ixqnmd3dugV9S7eW6o\nU9OoUgJB4rYTpG+yFqNqbRT8bkx37iKBMEReppqonOqGm4wtuRR6LSLlgcIU9Iwx\nyfH12UWqVmFSHsgZFqM/cK3wGev38h1WBIOx3/djKn7BdlKVh8kWyx6uC8bmV+E6\nOoK0vJD6kwKBgHAySOnROBZlqzkiKW8c+uU2VATtzJSydrWm0J4wUPJifNBa/hVW\ndcqmAzXC9xznt5AVa3wxHBOfyKaE+ig8CSsjNyNZ3vbmr0X04FoV1m91k2TeXNod\njMTobkPThaNm4eLJMN2SQJuaHGTGERWC0l3T18t+/zrDMDCPiSLX1NAvAoGBAN1T\nVLJYdjvIMxf1bm59VYcepbK7HLHFkRq6xMJMZbtG0ryraZjUzYvB4q4VjHk2UDiC\nlhx13tXWDZH7MJtABzjyg+AI7XWSEQs2cBXACos0M4Myc6lU+eL+iA+OuoUOhmrh\nqmT8YYGu76/IBWUSqWuvcpHPpwl7871i4Ga/I3qnAoGBANNkKAcMoeAbJQK7a/Rn\nwPEJB+dPgNDIaboAsh1nZhVhN5cvdvCWuEYgOGCPQLYQF0zmTLcM+sVxOYgfy8mV\nfbNgPgsP5xmu6dw2COBKdtozw0HrWSRjACd1N4yGu75+wPCcX/gQarcjRcXXZeEa\nNtBLSfcqPULqD+h7br9lEJio\n-----END PRIVATE KEY-----\n", + "client_email": "123-abc@developer.gserviceaccount.com", + "client_id": "123-abc.apps.googleusercontent.com", + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "http://localhost:8080/token" +} \ No newline at end of file