From b0e239e1c2830ccb86edd29a3880cadcd5f376cb Mon Sep 17 00:00:00 2001 From: Corey Winkelmann Date: Fri, 8 Nov 2024 14:57:49 -0700 Subject: [PATCH 1/2] feat(datetime): enhance datetime parsing and validation Add validation and parsing for DateColumnDef and DateTimeColumnDef to handle invalid date formats and zero dates as null. Introduce a DateValidator with regex matching for date formats. Extend tests to cover invalid dates and malformed datetime strings. --- .../bootstrap/SynchronousBootstrapper.java | 3 ++ .../schema/columndef/DateColumnDef.java | 29 ++++++++++++-- .../schema/columndef/DateFormatter.java | 4 ++ .../schema/columndef/DateTimeColumnDef.java | 27 ++++++++++++- .../schema/columndef/DateValidator.java | 11 +++++ .../schema/columndef/ColumnDefTest.java | 40 +++++++++++++++++++ 6 files changed, 109 insertions(+), 5 deletions(-) create mode 100644 src/main/java/com/zendesk/maxwell/schema/columndef/DateValidator.java diff --git a/src/main/java/com/zendesk/maxwell/bootstrap/SynchronousBootstrapper.java b/src/main/java/com/zendesk/maxwell/bootstrap/SynchronousBootstrapper.java index 523a3ac0c..8a49d42c5 100644 --- a/src/main/java/com/zendesk/maxwell/bootstrap/SynchronousBootstrapper.java +++ b/src/main/java/com/zendesk/maxwell/bootstrap/SynchronousBootstrapper.java @@ -14,6 +14,7 @@ import com.zendesk.maxwell.schema.columndef.ColumnDef; import com.zendesk.maxwell.schema.columndef.ColumnDefCastException; import com.zendesk.maxwell.schema.columndef.DateColumnDef; +import com.zendesk.maxwell.schema.columndef.DateTimeColumnDef; import com.zendesk.maxwell.schema.columndef.TimeColumnDef; import com.zendesk.maxwell.scripting.Scripting; import org.slf4j.Logger; @@ -267,6 +268,8 @@ private void setRowValues(RowMap row, ResultSet resultSet, Table table) throws S columnValue = getTimestamp(resultSet, columnIndex); else if ( columnDefinition instanceof DateColumnDef) columnValue = resultSet.getString(columnIndex); + else if ( columnDefinition instanceof DateTimeColumnDef) + columnValue = resultSet.getString(columnIndex); else columnValue = resultSet.getObject(columnIndex); diff --git a/src/main/java/com/zendesk/maxwell/schema/columndef/DateColumnDef.java b/src/main/java/com/zendesk/maxwell/schema/columndef/DateColumnDef.java index 61160d4de..af65759bf 100644 --- a/src/main/java/com/zendesk/maxwell/schema/columndef/DateColumnDef.java +++ b/src/main/java/com/zendesk/maxwell/schema/columndef/DateColumnDef.java @@ -1,8 +1,14 @@ package com.zendesk.maxwell.schema.columndef; +import java.time.LocalDate; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeParseException; + import com.zendesk.maxwell.producer.MaxwellOutputConfig; public class DateColumnDef extends ColumnDef { + private static final DateTimeFormatter DATE_FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd"); + private DateColumnDef(String name, String type, short pos) { super(name, type, pos); } @@ -24,11 +30,18 @@ public String toSQL(Object value) { @Override public Object asJSON(Object value, MaxwellOutputConfig config) throws ColumnDefCastException { if ( value instanceof String ) { - // bootstrapper just gives up on bothering with date processing - if ( config.zeroDatesAsNull && "0000-00-00".equals((String) value) ) + String dateString = (String) value; + + if ( config.zeroDatesAsNull && "0000-00-00".equals(dateString) ) return null; - else - return value; + + if ( !DateValidator.isValidDateTime(dateString) ) + return null; + + value = parseDate(dateString); + if (value == null) { + return null; + } } else if ( value instanceof Long && (Long) value == Long.MIN_VALUE ) { if ( config.zeroDatesAsNull ) return null; @@ -42,4 +55,12 @@ public Object asJSON(Object value, MaxwellOutputConfig config) throws ColumnDefC throw new ColumnDefCastException(this, value); } } + + private Object parseDate(String dateString) { + try { + return LocalDate.parse(dateString, DATE_FORMATTER); + } catch (DateTimeParseException e) { + return null; + } + } } diff --git a/src/main/java/com/zendesk/maxwell/schema/columndef/DateFormatter.java b/src/main/java/com/zendesk/maxwell/schema/columndef/DateFormatter.java index d58193666..3324ebdbd 100644 --- a/src/main/java/com/zendesk/maxwell/schema/columndef/DateFormatter.java +++ b/src/main/java/com/zendesk/maxwell/schema/columndef/DateFormatter.java @@ -1,6 +1,7 @@ package com.zendesk.maxwell.schema.columndef; import java.sql.Timestamp; +import java.time.LocalDate; import java.time.LocalDateTime; import java.util.*; @@ -23,6 +24,9 @@ public static Timestamp extractTimestamp(Object value) throws IllegalArgumentExc } else if ( value instanceof Date ) { Long time = ((Date) value).getTime(); return new Timestamp(time); + } else if ( value instanceof LocalDate ) { + LocalDateTime startOfDay = ((LocalDate) value).atStartOfDay(); + return Timestamp.valueOf(startOfDay); } else if ( value instanceof LocalDateTime) { return Timestamp.valueOf((LocalDateTime) value); } else diff --git a/src/main/java/com/zendesk/maxwell/schema/columndef/DateTimeColumnDef.java b/src/main/java/com/zendesk/maxwell/schema/columndef/DateTimeColumnDef.java index 9b48c99e0..74f5ef482 100644 --- a/src/main/java/com/zendesk/maxwell/schema/columndef/DateTimeColumnDef.java +++ b/src/main/java/com/zendesk/maxwell/schema/columndef/DateTimeColumnDef.java @@ -3,8 +3,12 @@ import com.zendesk.maxwell.producer.MaxwellOutputConfig; import java.sql.Timestamp; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeParseException; public class DateTimeColumnDef extends ColumnDefWithLength { + private static final DateTimeFormatter DATE_TIME_FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); private final boolean isTimestamp = getType().equals("timestamp"); @@ -19,7 +23,20 @@ public static DateTimeColumnDef create(String name, String type, short pos, Long protected String formatValue(Object value, MaxwellOutputConfig config) throws ColumnDefCastException { // special case for those broken mysql dates. - if ( value instanceof Long ) { + if ( value instanceof String) { + String dateString = (String) value; + + if ( config.zeroDatesAsNull && "0000-00-00 00:00:00".equals(dateString) ) + return null; + + if ( !DateValidator.isValidDateTime(dateString) ) + return null; + + value = parseDateTime(dateString); + if (value == null) { + return null; + } + } else if ( value instanceof Long ) { Long v = (Long) value; if ( v == Long.MIN_VALUE || (v == 0L && isTimestamp) ) { if ( config.zeroDatesAsNull ) @@ -37,4 +54,12 @@ protected String formatValue(Object value, MaxwellOutputConfig config) throws Co throw new ColumnDefCastException(this, value); } } + + private Object parseDateTime(String dateString) { + try { + return LocalDateTime.parse(dateString, DATE_TIME_FORMATTER); + } catch (DateTimeParseException e) { + return null; + } + } } diff --git a/src/main/java/com/zendesk/maxwell/schema/columndef/DateValidator.java b/src/main/java/com/zendesk/maxwell/schema/columndef/DateValidator.java new file mode 100644 index 000000000..a46ab2632 --- /dev/null +++ b/src/main/java/com/zendesk/maxwell/schema/columndef/DateValidator.java @@ -0,0 +1,11 @@ +package com.zendesk.maxwell.schema.columndef; + +public class DateValidator { + private static final String DATE_TIME_REGEX = + "^\\d{4}-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])" + + "( (0[0-9]|1[0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9]))?$"; + + public static boolean isValidDateTime(String dateString) { + return dateString.matches(DATE_TIME_REGEX); + } +} diff --git a/src/test/java/com/zendesk/maxwell/schema/columndef/ColumnDefTest.java b/src/test/java/com/zendesk/maxwell/schema/columndef/ColumnDefTest.java index 96cc9b420..fd54ae45f 100644 --- a/src/test/java/com/zendesk/maxwell/schema/columndef/ColumnDefTest.java +++ b/src/test/java/com/zendesk/maxwell/schema/columndef/ColumnDefTest.java @@ -290,6 +290,46 @@ public void TestDateTimeZeroDates() throws ColumnDefCastException { assertEquals(null, d.asJSON(Long.MIN_VALUE, config)); } + @Test + public void TestDateBadMonth() throws ColumnDefCastException { + ColumnDef d = build("date", true); + + MaxwellOutputConfig config = new MaxwellOutputConfig(); + config.zeroDatesAsNull = true; + + assertEquals(null, d.asJSON("2020-00-01", config)); + } + + @Test + public void TestDateBadDay() throws ColumnDefCastException { + ColumnDef d = build("date", true); + + MaxwellOutputConfig config = new MaxwellOutputConfig(); + config.zeroDatesAsNull = true; + + assertEquals(null, d.asJSON("2020-01-00", config)); + } + + @Test + public void TestDatetimeBadDay() throws ColumnDefCastException { + ColumnDef d = build("datetime", true); + + MaxwellOutputConfig config = new MaxwellOutputConfig(); + config.zeroDatesAsNull = true; + + assertEquals(null, d.asJSON("2020-01-00 00:00:00", config)); + } + + @Test + public void TestDatetimeBadMonth() throws ColumnDefCastException { + ColumnDef d = build("datetime", true); + + MaxwellOutputConfig config = new MaxwellOutputConfig(); + config.zeroDatesAsNull = true; + + assertEquals(null, d.asJSON("2020-00-01 00:00:00", config)); + } + @Test public void TestDateTimeWithTimestamp() throws ParseException, ColumnDefCastException { ColumnDef d = build("datetime", true); From 11db988dcec3514b5ac0236ef45b19e2a7855868 Mon Sep 17 00:00:00 2001 From: Corey Winkelmann Date: Fri, 15 Nov 2024 16:39:21 -0700 Subject: [PATCH 2/2] fix(dates): handle non-nullable zero dates in DateColumnDef and DateTimeColumnDef --- .../schema/columndef/DateColumnDef.java | 20 +++++++++++-------- .../schema/columndef/DateTimeColumnDef.java | 20 +++++++++++-------- 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/src/main/java/com/zendesk/maxwell/schema/columndef/DateColumnDef.java b/src/main/java/com/zendesk/maxwell/schema/columndef/DateColumnDef.java index af65759bf..ac696fb39 100644 --- a/src/main/java/com/zendesk/maxwell/schema/columndef/DateColumnDef.java +++ b/src/main/java/com/zendesk/maxwell/schema/columndef/DateColumnDef.java @@ -32,15 +32,19 @@ public Object asJSON(Object value, MaxwellOutputConfig config) throws ColumnDefC if ( value instanceof String ) { String dateString = (String) value; - if ( config.zeroDatesAsNull && "0000-00-00".equals(dateString) ) - return null; - - if ( !DateValidator.isValidDateTime(dateString) ) - return null; + if ("0000-00-00".equals(dateString)) { + if ( config.zeroDatesAsNull ) + return null; + else + return "0000-00-00"; + } else { + if ( !DateValidator.isValidDateTime(dateString) ) + return null; - value = parseDate(dateString); - if (value == null) { - return null; + value = parseDate(dateString); + if (value == null) { + return null; + } } } else if ( value instanceof Long && (Long) value == Long.MIN_VALUE ) { if ( config.zeroDatesAsNull ) diff --git a/src/main/java/com/zendesk/maxwell/schema/columndef/DateTimeColumnDef.java b/src/main/java/com/zendesk/maxwell/schema/columndef/DateTimeColumnDef.java index 74f5ef482..197d0a65c 100644 --- a/src/main/java/com/zendesk/maxwell/schema/columndef/DateTimeColumnDef.java +++ b/src/main/java/com/zendesk/maxwell/schema/columndef/DateTimeColumnDef.java @@ -26,15 +26,19 @@ protected String formatValue(Object value, MaxwellOutputConfig config) throws Co if ( value instanceof String) { String dateString = (String) value; - if ( config.zeroDatesAsNull && "0000-00-00 00:00:00".equals(dateString) ) - return null; - - if ( !DateValidator.isValidDateTime(dateString) ) - return null; + if ( "0000-00-00 00:00:00".equals(dateString) ) { + if ( config.zeroDatesAsNull ) + return null; + else + return appendFractionalSeconds("0000-00-00 00:00:00", 0, getColumnLength()); + } else { + if ( !DateValidator.isValidDateTime(dateString) ) + return null; - value = parseDateTime(dateString); - if (value == null) { - return null; + value = parseDateTime(dateString); + if (value == null) { + return null; + } } } else if ( value instanceof Long ) { Long v = (Long) value;