-
Notifications
You must be signed in to change notification settings - Fork 3.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add support for legacy Date in Hive for Parquet
- Loading branch information
Showing
13 changed files
with
1,081 additions
and
4 deletions.
There are no files selected for viewing
139 changes: 139 additions & 0 deletions
139
lib/trino-plugin-toolkit/src/main/java/io/trino/plugin/base/util/CalendarUtils.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
/* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package io.trino.plugin.base.util; | ||
|
||
import com.google.common.collect.Range; | ||
import com.google.common.collect.RangeMap; | ||
import com.google.common.collect.TreeRangeMap; | ||
|
||
import java.text.ParseException; | ||
import java.text.SimpleDateFormat; | ||
import java.time.LocalDate; | ||
import java.util.Date; | ||
import java.util.GregorianCalendar; | ||
import java.util.Map; | ||
import java.util.Optional; | ||
import java.util.TimeZone; | ||
|
||
import static java.time.ZoneOffset.UTC; | ||
import static java.util.concurrent.TimeUnit.DAYS; | ||
import static java.util.concurrent.TimeUnit.MILLISECONDS; | ||
|
||
public final class CalendarUtils | ||
{ | ||
static final LocalDate GREGORIAN_START_DATE = LocalDate.of(1582, 10, 15); | ||
static final LocalDate JULIAN_END_DATE = LocalDate.of(1582, 10, 4); | ||
|
||
private static final TimeZone TZ_UTC = TimeZone.getTimeZone(UTC); | ||
private static final String DATE_FORMAT = "yyyy-MM-dd"; | ||
private static final String DATE_TIME_FORMAT = "yyyy-MM-dd HH:mm:ss.SSS"; | ||
|
||
static final ThreadLocal<SimpleDateFormat> HYBRID_CALENDAR_DATE_FORMAT = ThreadLocal.withInitial(() -> { | ||
SimpleDateFormat format = new SimpleDateFormat(DATE_FORMAT); | ||
format.setCalendar(new GregorianCalendar(TZ_UTC)); | ||
return format; | ||
}); | ||
|
||
static final ThreadLocal<SimpleDateFormat> HYBRID_CALENDAR_DATE_TIME_FORMAT = ThreadLocal.withInitial(() -> { | ||
SimpleDateFormat format = new SimpleDateFormat(DATE_TIME_FORMAT); | ||
format.setCalendar(new GregorianCalendar(TZ_UTC)); | ||
return format; | ||
}); | ||
|
||
static final ThreadLocal<SimpleDateFormat> PROLEPTIC_CALENDAR_DATE_FORMAT = ThreadLocal.withInitial(() -> { | ||
SimpleDateFormat format = new SimpleDateFormat(DATE_FORMAT); | ||
GregorianCalendar prolepticGregorianCalendar = new GregorianCalendar(TZ_UTC); | ||
prolepticGregorianCalendar.setGregorianChange(new Date(Long.MIN_VALUE)); | ||
format.setCalendar(prolepticGregorianCalendar); | ||
return format; | ||
}); | ||
|
||
static final ThreadLocal<SimpleDateFormat> PROLEPTIC_CALENDAR_DATE_TIME_FORMAT = ThreadLocal.withInitial(() -> { | ||
SimpleDateFormat format = new SimpleDateFormat(DATE_TIME_FORMAT); | ||
GregorianCalendar prolepticGregorianCalendar = new GregorianCalendar(TZ_UTC); | ||
prolepticGregorianCalendar.setGregorianChange(new Date(Long.MIN_VALUE)); | ||
format.setCalendar(prolepticGregorianCalendar); | ||
return format; | ||
}); | ||
|
||
// https://en.wikipedia.org/wiki/Proleptic_Gregorian_calendar#Difference_between_Julian_and_proleptic_Gregorian_calendar_dates | ||
private static final RangeMap<Integer, Integer> julianGregorianDiffs = TreeRangeMap.create(); | ||
|
||
private static final int JULIAN_COMMON_ERA_START_DAY; | ||
private static final long LAST_SWITCH_JULIAN_DAY; | ||
|
||
static { | ||
julianGregorianDiffs.put(Range.lessThan(-682945), 2); | ||
julianGregorianDiffs.put(Range.closedOpen(-682945, -646420), 1); | ||
julianGregorianDiffs.put(Range.closedOpen(-646420, -609895), -0); | ||
julianGregorianDiffs.put(Range.closedOpen(-609895, -536845), -1); | ||
julianGregorianDiffs.put(Range.closedOpen(-536845, -500320), -2); | ||
julianGregorianDiffs.put(Range.closedOpen(-500320, -463795), -3); | ||
julianGregorianDiffs.put(Range.closedOpen(-463795, -390745), -4); | ||
julianGregorianDiffs.put(Range.closedOpen(-390745, -354220), -5); | ||
julianGregorianDiffs.put(Range.closedOpen(-354220, -317695), -6); | ||
julianGregorianDiffs.put(Range.closedOpen(-317695, -244645), -7); | ||
julianGregorianDiffs.put(Range.closedOpen(-244645, -208120), -8); | ||
julianGregorianDiffs.put(Range.closedOpen(-208120, -171595), -9); | ||
julianGregorianDiffs.put(Range.closedOpen(-171595, -141427), -10); | ||
julianGregorianDiffs.put(Range.atLeast(-141427), 0); | ||
|
||
try { | ||
JULIAN_COMMON_ERA_START_DAY = (int) MILLISECONDS.toDays(HYBRID_CALENDAR_DATE_FORMAT.get().parse("0001-01-01").getTime()); | ||
LAST_SWITCH_JULIAN_DAY = MILLISECONDS.toDays(HYBRID_CALENDAR_DATE_FORMAT.get().parse("1582-10-15").getTime()); | ||
} | ||
catch (ParseException e) { | ||
throw new RuntimeException(e); | ||
} | ||
} | ||
|
||
private CalendarUtils() {} | ||
|
||
public static int convertDaysToProlepticGregorian(int julianDays) | ||
{ | ||
if (julianDays < JULIAN_COMMON_ERA_START_DAY) { | ||
return convertDaysToProlepticDaysInternal(julianDays); | ||
} | ||
else if (julianDays < LAST_SWITCH_JULIAN_DAY) { | ||
return Optional.ofNullable(julianGregorianDiffs.getEntry(julianDays)).map(Map.Entry::getValue).orElse(0) + julianDays; | ||
} | ||
return julianDays; | ||
} | ||
|
||
private static int convertDaysToProlepticDaysInternal(int hybridDays) | ||
{ | ||
long hybridMillis = DAYS.toMillis(hybridDays); | ||
String hybridDateInString = HYBRID_CALENDAR_DATE_FORMAT.get().format(new Date(hybridMillis)); | ||
long result; | ||
try { | ||
result = PROLEPTIC_CALENDAR_DATE_FORMAT.get().parse(hybridDateInString).getTime(); | ||
} | ||
catch (ParseException e) { | ||
throw new RuntimeException(e); | ||
} | ||
long prolepticMillis = result; | ||
return (int) MILLISECONDS.toDays(prolepticMillis); | ||
} | ||
|
||
public static long convertTimestampToProlepticGregorian(long epochMillis) | ||
{ | ||
String dateTimeInString = HYBRID_CALENDAR_DATE_TIME_FORMAT.get().format(new Date(epochMillis)); | ||
try { | ||
return PROLEPTIC_CALENDAR_DATE_TIME_FORMAT.get().parse(dateTimeInString).getTime(); | ||
} | ||
catch (ParseException e) { | ||
throw new RuntimeException(e); | ||
} | ||
} | ||
} |
240 changes: 240 additions & 0 deletions
240
lib/trino-plugin-toolkit/src/test/java/io/trino/plugin/base/util/TestCalendarUtils.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,240 @@ | ||
/* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package io.trino.plugin.base.util; | ||
|
||
import com.google.common.collect.ImmutableList; | ||
import com.google.common.collect.ImmutableMap; | ||
import org.junit.jupiter.api.Test; | ||
|
||
import java.text.ParseException; | ||
import java.time.Instant; | ||
import java.time.LocalDate; | ||
import java.time.LocalDateTime; | ||
import java.util.Date; | ||
import java.util.Map; | ||
|
||
import static io.trino.plugin.base.util.CalendarUtils.HYBRID_CALENDAR_DATE_TIME_FORMAT; | ||
import static io.trino.plugin.base.util.CalendarUtils.PROLEPTIC_CALENDAR_DATE_TIME_FORMAT; | ||
import static io.trino.plugin.base.util.CalendarUtils.convertDaysToProlepticGregorian; | ||
import static io.trino.plugin.base.util.CalendarUtils.convertTimestampToProlepticGregorian; | ||
import static java.time.ZoneOffset.UTC; | ||
import static java.util.concurrent.TimeUnit.DAYS; | ||
import static java.util.concurrent.TimeUnit.MILLISECONDS; | ||
import static org.assertj.core.api.Assertions.assertThat; | ||
|
||
class TestCalendarUtils | ||
{ | ||
public static long convertTimestampToHybrid(long epochMillis) | ||
{ | ||
LocalDateTime localDateTime = LocalDateTime.ofInstant(Instant.ofEpochMilli(epochMillis), UTC); | ||
LocalDate localDate = localDateTime.toLocalDate(); | ||
if (localDate.isAfter(CalendarUtils.JULIAN_END_DATE) && localDate.isBefore(CalendarUtils.GREGORIAN_START_DATE)) { | ||
localDateTime = LocalDateTime.of(CalendarUtils.GREGORIAN_START_DATE, localDateTime.toLocalTime()); | ||
epochMillis = localDateTime.toInstant(UTC).toEpochMilli(); | ||
} | ||
String dateTimeInString = PROLEPTIC_CALENDAR_DATE_TIME_FORMAT.get().format(new Date(epochMillis)); | ||
try { | ||
return HYBRID_CALENDAR_DATE_TIME_FORMAT.get().parse(dateTimeInString).getTime(); | ||
} | ||
catch (ParseException e) { | ||
throw new RuntimeException(e); | ||
} | ||
} | ||
|
||
static int convertProlepticDaysToHybridDays(int prolepticDays) | ||
{ | ||
LocalDate localDate = LocalDate.ofEpochDay(prolepticDays); | ||
if (localDate.isAfter(CalendarUtils.JULIAN_END_DATE) && localDate.isBefore(CalendarUtils.GREGORIAN_START_DATE)) { | ||
localDate = CalendarUtils.GREGORIAN_START_DATE; | ||
} | ||
String dateInStr = CalendarUtils.PROLEPTIC_CALENDAR_DATE_FORMAT.get().format(new Date(DAYS.toMillis(localDate.toEpochDay()))); | ||
return toHybridDaysFromString(dateInStr); | ||
} | ||
|
||
static int toHybridDaysFromString(String date) | ||
{ | ||
try { | ||
return (int) MILLISECONDS.toDays(CalendarUtils.HYBRID_CALENDAR_DATE_FORMAT.get().parse(date).getTime()); | ||
} | ||
catch (ParseException e) { | ||
throw new RuntimeException(e); | ||
} | ||
} | ||
|
||
@Test | ||
void testConvertGregorianDaysToAndFromHybridDays() | ||
{ | ||
ImmutableList<String> dates = ImmutableList.of( | ||
"0001-01-01", | ||
"1000-01-01", | ||
"1582-10-04", | ||
"1582-10-15", | ||
"1788-09-10", | ||
"1888-12-31", | ||
"1969-12-31", | ||
"1970-01-01", | ||
"2024-03-30"); | ||
|
||
dates.forEach(date -> { | ||
int julianDays = toHybridDaysFromString(date); | ||
int gregorianDays = (int) LocalDate.parse(date).toEpochDay(); | ||
assertThat(convertProlepticDaysToHybridDays(gregorianDays)).isEqualTo(julianDays); | ||
assertThat(convertDaysToProlepticGregorian(julianDays)).isEqualTo(gregorianDays); | ||
}); | ||
} | ||
|
||
@Test | ||
void testConvertHybridToProlepticDateForLeapYears() | ||
{ | ||
ImmutableMap<String, String> dates = ImmutableMap.<String, String>builder() | ||
.put("0004-02-29", "0004-02-29") | ||
.put("0100-02-29", "0100-03-01") | ||
.put("0196-02-29", "0196-02-29") | ||
.put("0200-02-29", "0200-03-01") | ||
.put("0204-02-29", "0204-02-29") | ||
.put("0400-02-29", "0400-02-29") | ||
.put("1000-02-29", "1000-03-01") | ||
.put("1200-02-29", "1200-02-29") | ||
.put("1600-02-29", "1600-02-29") | ||
.put("1700-02-29", "1700-03-01") | ||
.put("2000-02-29", "2000-02-29") | ||
.buildOrThrow(); | ||
|
||
dates.forEach((julianDate, gregDate) -> { | ||
int julianDays = toHybridDaysFromString(julianDate); | ||
int gregorianDays = (int) LocalDate.parse(gregDate).toEpochDay(); | ||
assertThat(convertDaysToProlepticGregorian(julianDays)).isEqualTo(gregorianDays); | ||
}); | ||
} | ||
|
||
@Test | ||
void testConvertDatesFromSwitchesBoarders() | ||
{ | ||
ImmutableList<String> dates = ImmutableList.<String>builder() | ||
.add("0001-01-01") | ||
.add("0100-03-01") | ||
.add("0100-03-02") | ||
.add("0200-02-28") | ||
.add("0200-03-01") | ||
.add("0300-02-28") | ||
.add("0300-03-01") | ||
.add("0500-02-27") | ||
.add("0500-02-28") | ||
.add("0600-02-26") | ||
.add("0600-02-27") | ||
.add("0700-02-25") | ||
.add("0700-02-26") | ||
.add("0900-02-24") | ||
.add("0900-02-25") | ||
.add("1000-02-23") | ||
.add("1000-02-24") | ||
.add("1100-02-22") | ||
.add("1100-02-23") | ||
.add("1300-02-21") | ||
.add("1300-02-22") | ||
.add("1400-02-20") | ||
.add("1400-02-21") | ||
.add("1500-02-19") | ||
.add("1500-02-20") | ||
.add("1582-02-04") | ||
.build(); | ||
|
||
dates.forEach(date -> { | ||
int hybridDays = toHybridDaysFromString(date); | ||
int gregorianDays = (int) LocalDate.parse(date).toEpochDay(); | ||
assertThat(convertProlepticDaysToHybridDays(gregorianDays)).isEqualTo(hybridDays); | ||
assertThat(convertDaysToProlepticGregorian(hybridDays)).isEqualTo(gregorianDays); | ||
}); | ||
} | ||
|
||
@Test | ||
void testRebaseNotExistedDatesInHybridCalendar() | ||
{ | ||
Map<String, String> dates = ImmutableMap.<String, String>builder() | ||
.put("1582-10-04", "1582-10-04") | ||
.put("1582-10-05", "1582-10-15") | ||
.put("1582-10-06", "1582-10-15") | ||
.put("1582-10-07", "1582-10-15") | ||
.put("1582-10-08", "1582-10-15") | ||
.put("1582-10-09", "1582-10-15") | ||
.put("1582-10-11", "1582-10-15") | ||
.put("1582-10-12", "1582-10-15") | ||
.put("1582-10-13", "1582-10-15") | ||
.put("1582-10-14", "1582-10-15") | ||
.put("1582-10-15", "1582-10-15") | ||
.buildOrThrow(); | ||
|
||
dates.forEach((gregDate, hybridDate) -> { | ||
int hybridDays = toHybridDaysFromString(hybridDate); | ||
int gregorianDays = (int) LocalDate.parse(gregDate).toEpochDay(); | ||
int actualHybridDays = convertProlepticDaysToHybridDays(gregorianDays); | ||
assertThat(actualHybridDays).isEqualTo(hybridDays); | ||
}); | ||
} | ||
|
||
@Test | ||
void testConvertGregorianTimestampToAndFromHybridDays() | ||
{ | ||
ImmutableList<String> timestamps = ImmutableList.of( | ||
"0001-01-01 15:15:15.123", | ||
"1000-01-01 15:15:15.123", | ||
"1582-10-04 15:15:15.123", | ||
"1582-10-15 15:15:15.123", | ||
"1788-09-10 15:15:15.123", | ||
"1888-12-31 15:15:15.123", | ||
"1969-12-31 15:15:15.123", | ||
"1970-01-01 15:15:15.123", | ||
"2024-03-30 15:15:15.123"); | ||
|
||
timestamps.forEach(timestamp -> { | ||
try { | ||
long julianMillis = HYBRID_CALENDAR_DATE_TIME_FORMAT.get().parse(timestamp).getTime(); | ||
long gregorianMillis = PROLEPTIC_CALENDAR_DATE_TIME_FORMAT.get().parse(timestamp).getTime(); | ||
assertThat(convertTimestampToProlepticGregorian(julianMillis)).isEqualTo(gregorianMillis); | ||
} | ||
catch (ParseException e) { | ||
throw new RuntimeException(e); | ||
} | ||
}); | ||
} | ||
|
||
@Test | ||
void testRebaseNotExistedTimestampInHybridCalendar() | ||
{ | ||
Map<String, String> timestamps = ImmutableMap.<String, String>builder() | ||
.put("1582-10-04 15:15:15.123", "1582-10-04 15:15:15.123") | ||
.put("1582-10-05 15:15:15.123", "1582-10-15 15:15:15.123") | ||
.put("1582-10-06 15:15:15.123", "1582-10-15 15:15:15.123") | ||
.put("1582-10-07 15:15:15.123", "1582-10-15 15:15:15.123") | ||
.put("1582-10-08 15:15:15.123", "1582-10-15 15:15:15.123") | ||
.put("1582-10-09 15:15:15.123", "1582-10-15 15:15:15.123") | ||
.put("1582-10-11 15:15:15.123", "1582-10-15 15:15:15.123") | ||
.put("1582-10-12 15:15:15.123", "1582-10-15 15:15:15.123") | ||
.put("1582-10-13 15:15:15.123", "1582-10-15 15:15:15.123") | ||
.put("1582-10-14 15:15:15.123", "1582-10-15 15:15:15.123") | ||
.put("1582-10-15 15:15:15.123", "1582-10-15 15:15:15.123") | ||
.buildOrThrow(); | ||
|
||
timestamps.forEach((gregorianTmst, hybridTmst) -> { | ||
try { | ||
long julianMillis = HYBRID_CALENDAR_DATE_TIME_FORMAT.get().parse(hybridTmst).getTime(); | ||
long gregorianMillis = PROLEPTIC_CALENDAR_DATE_TIME_FORMAT.get().parse(gregorianTmst).getTime(); | ||
assertThat(convertTimestampToHybrid(gregorianMillis)).isEqualTo(julianMillis); | ||
} | ||
catch (ParseException e) { | ||
throw new RuntimeException(e); | ||
} | ||
}); | ||
} | ||
} |
Oops, something went wrong.