Skip to content

Commit

Permalink
Add support for legacy Date in Hive for Parquet
Browse files Browse the repository at this point in the history
  • Loading branch information
marcinsbd committed Jan 14, 2025
1 parent ad38de4 commit 1d8213c
Show file tree
Hide file tree
Showing 13 changed files with 1,081 additions and 4 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.plugin.base.util;

import com.google.common.collect.Range;
import com.google.common.collect.RangeMap;
import com.google.common.collect.TreeRangeMap;

import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.time.LocalDate;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.Map;
import java.util.Optional;
import java.util.TimeZone;

import static java.time.ZoneOffset.UTC;
import static java.util.concurrent.TimeUnit.DAYS;
import static java.util.concurrent.TimeUnit.MILLISECONDS;

public final class CalendarUtils
{
static final LocalDate GREGORIAN_START_DATE = LocalDate.of(1582, 10, 15);
static final LocalDate JULIAN_END_DATE = LocalDate.of(1582, 10, 4);

private static final TimeZone TZ_UTC = TimeZone.getTimeZone(UTC);
private static final String DATE_FORMAT = "yyyy-MM-dd";
private static final String DATE_TIME_FORMAT = "yyyy-MM-dd HH:mm:ss.SSS";

static final ThreadLocal<SimpleDateFormat> HYBRID_CALENDAR_DATE_FORMAT = ThreadLocal.withInitial(() -> {
SimpleDateFormat format = new SimpleDateFormat(DATE_FORMAT);
format.setCalendar(new GregorianCalendar(TZ_UTC));
return format;
});

static final ThreadLocal<SimpleDateFormat> HYBRID_CALENDAR_DATE_TIME_FORMAT = ThreadLocal.withInitial(() -> {
SimpleDateFormat format = new SimpleDateFormat(DATE_TIME_FORMAT);
format.setCalendar(new GregorianCalendar(TZ_UTC));
return format;
});

static final ThreadLocal<SimpleDateFormat> PROLEPTIC_CALENDAR_DATE_FORMAT = ThreadLocal.withInitial(() -> {
SimpleDateFormat format = new SimpleDateFormat(DATE_FORMAT);
GregorianCalendar prolepticGregorianCalendar = new GregorianCalendar(TZ_UTC);
prolepticGregorianCalendar.setGregorianChange(new Date(Long.MIN_VALUE));
format.setCalendar(prolepticGregorianCalendar);
return format;
});

static final ThreadLocal<SimpleDateFormat> PROLEPTIC_CALENDAR_DATE_TIME_FORMAT = ThreadLocal.withInitial(() -> {
SimpleDateFormat format = new SimpleDateFormat(DATE_TIME_FORMAT);
GregorianCalendar prolepticGregorianCalendar = new GregorianCalendar(TZ_UTC);
prolepticGregorianCalendar.setGregorianChange(new Date(Long.MIN_VALUE));
format.setCalendar(prolepticGregorianCalendar);
return format;
});

// https://en.wikipedia.org/wiki/Proleptic_Gregorian_calendar#Difference_between_Julian_and_proleptic_Gregorian_calendar_dates
private static final RangeMap<Integer, Integer> julianGregorianDiffs = TreeRangeMap.create();

private static final int JULIAN_COMMON_ERA_START_DAY;
private static final long LAST_SWITCH_JULIAN_DAY;

static {
julianGregorianDiffs.put(Range.lessThan(-682945), 2);
julianGregorianDiffs.put(Range.closedOpen(-682945, -646420), 1);
julianGregorianDiffs.put(Range.closedOpen(-646420, -609895), -0);
julianGregorianDiffs.put(Range.closedOpen(-609895, -536845), -1);
julianGregorianDiffs.put(Range.closedOpen(-536845, -500320), -2);
julianGregorianDiffs.put(Range.closedOpen(-500320, -463795), -3);
julianGregorianDiffs.put(Range.closedOpen(-463795, -390745), -4);
julianGregorianDiffs.put(Range.closedOpen(-390745, -354220), -5);
julianGregorianDiffs.put(Range.closedOpen(-354220, -317695), -6);
julianGregorianDiffs.put(Range.closedOpen(-317695, -244645), -7);
julianGregorianDiffs.put(Range.closedOpen(-244645, -208120), -8);
julianGregorianDiffs.put(Range.closedOpen(-208120, -171595), -9);
julianGregorianDiffs.put(Range.closedOpen(-171595, -141427), -10);
julianGregorianDiffs.put(Range.atLeast(-141427), 0);

try {
JULIAN_COMMON_ERA_START_DAY = (int) MILLISECONDS.toDays(HYBRID_CALENDAR_DATE_FORMAT.get().parse("0001-01-01").getTime());
LAST_SWITCH_JULIAN_DAY = MILLISECONDS.toDays(HYBRID_CALENDAR_DATE_FORMAT.get().parse("1582-10-15").getTime());
}
catch (ParseException e) {
throw new RuntimeException(e);
}
}

private CalendarUtils() {}

public static int convertDaysToProlepticGregorian(int julianDays)
{
if (julianDays < JULIAN_COMMON_ERA_START_DAY) {
return convertDaysToProlepticDaysInternal(julianDays);
}
else if (julianDays < LAST_SWITCH_JULIAN_DAY) {
return Optional.ofNullable(julianGregorianDiffs.getEntry(julianDays)).map(Map.Entry::getValue).orElse(0) + julianDays;
}
return julianDays;
}

private static int convertDaysToProlepticDaysInternal(int hybridDays)
{
long hybridMillis = DAYS.toMillis(hybridDays);
String hybridDateInString = HYBRID_CALENDAR_DATE_FORMAT.get().format(new Date(hybridMillis));
long result;
try {
result = PROLEPTIC_CALENDAR_DATE_FORMAT.get().parse(hybridDateInString).getTime();
}
catch (ParseException e) {
throw new RuntimeException(e);
}
long prolepticMillis = result;
return (int) MILLISECONDS.toDays(prolepticMillis);
}

public static long convertTimestampToProlepticGregorian(long epochMillis)
{
String dateTimeInString = HYBRID_CALENDAR_DATE_TIME_FORMAT.get().format(new Date(epochMillis));
try {
return PROLEPTIC_CALENDAR_DATE_TIME_FORMAT.get().parse(dateTimeInString).getTime();
}
catch (ParseException e) {
throw new RuntimeException(e);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,240 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.plugin.base.util;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import org.junit.jupiter.api.Test;

import java.text.ParseException;
import java.time.Instant;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.util.Date;
import java.util.Map;

import static io.trino.plugin.base.util.CalendarUtils.HYBRID_CALENDAR_DATE_TIME_FORMAT;
import static io.trino.plugin.base.util.CalendarUtils.PROLEPTIC_CALENDAR_DATE_TIME_FORMAT;
import static io.trino.plugin.base.util.CalendarUtils.convertDaysToProlepticGregorian;
import static io.trino.plugin.base.util.CalendarUtils.convertTimestampToProlepticGregorian;
import static java.time.ZoneOffset.UTC;
import static java.util.concurrent.TimeUnit.DAYS;
import static java.util.concurrent.TimeUnit.MILLISECONDS;
import static org.assertj.core.api.Assertions.assertThat;

class TestCalendarUtils
{
public static long convertTimestampToHybrid(long epochMillis)
{
LocalDateTime localDateTime = LocalDateTime.ofInstant(Instant.ofEpochMilli(epochMillis), UTC);
LocalDate localDate = localDateTime.toLocalDate();
if (localDate.isAfter(CalendarUtils.JULIAN_END_DATE) && localDate.isBefore(CalendarUtils.GREGORIAN_START_DATE)) {
localDateTime = LocalDateTime.of(CalendarUtils.GREGORIAN_START_DATE, localDateTime.toLocalTime());
epochMillis = localDateTime.toInstant(UTC).toEpochMilli();
}
String dateTimeInString = PROLEPTIC_CALENDAR_DATE_TIME_FORMAT.get().format(new Date(epochMillis));
try {
return HYBRID_CALENDAR_DATE_TIME_FORMAT.get().parse(dateTimeInString).getTime();
}
catch (ParseException e) {
throw new RuntimeException(e);
}
}

static int convertProlepticDaysToHybridDays(int prolepticDays)
{
LocalDate localDate = LocalDate.ofEpochDay(prolepticDays);
if (localDate.isAfter(CalendarUtils.JULIAN_END_DATE) && localDate.isBefore(CalendarUtils.GREGORIAN_START_DATE)) {
localDate = CalendarUtils.GREGORIAN_START_DATE;
}
String dateInStr = CalendarUtils.PROLEPTIC_CALENDAR_DATE_FORMAT.get().format(new Date(DAYS.toMillis(localDate.toEpochDay())));
return toHybridDaysFromString(dateInStr);
}

static int toHybridDaysFromString(String date)
{
try {
return (int) MILLISECONDS.toDays(CalendarUtils.HYBRID_CALENDAR_DATE_FORMAT.get().parse(date).getTime());
}
catch (ParseException e) {
throw new RuntimeException(e);
}
}

@Test
void testConvertGregorianDaysToAndFromHybridDays()
{
ImmutableList<String> dates = ImmutableList.of(
"0001-01-01",
"1000-01-01",
"1582-10-04",
"1582-10-15",
"1788-09-10",
"1888-12-31",
"1969-12-31",
"1970-01-01",
"2024-03-30");

dates.forEach(date -> {
int julianDays = toHybridDaysFromString(date);
int gregorianDays = (int) LocalDate.parse(date).toEpochDay();
assertThat(convertProlepticDaysToHybridDays(gregorianDays)).isEqualTo(julianDays);
assertThat(convertDaysToProlepticGregorian(julianDays)).isEqualTo(gregorianDays);
});
}

@Test
void testConvertHybridToProlepticDateForLeapYears()
{
ImmutableMap<String, String> dates = ImmutableMap.<String, String>builder()
.put("0004-02-29", "0004-02-29")
.put("0100-02-29", "0100-03-01")
.put("0196-02-29", "0196-02-29")
.put("0200-02-29", "0200-03-01")
.put("0204-02-29", "0204-02-29")
.put("0400-02-29", "0400-02-29")
.put("1000-02-29", "1000-03-01")
.put("1200-02-29", "1200-02-29")
.put("1600-02-29", "1600-02-29")
.put("1700-02-29", "1700-03-01")
.put("2000-02-29", "2000-02-29")
.buildOrThrow();

dates.forEach((julianDate, gregDate) -> {
int julianDays = toHybridDaysFromString(julianDate);
int gregorianDays = (int) LocalDate.parse(gregDate).toEpochDay();
assertThat(convertDaysToProlepticGregorian(julianDays)).isEqualTo(gregorianDays);
});
}

@Test
void testConvertDatesFromSwitchesBoarders()
{
ImmutableList<String> dates = ImmutableList.<String>builder()
.add("0001-01-01")
.add("0100-03-01")
.add("0100-03-02")
.add("0200-02-28")
.add("0200-03-01")
.add("0300-02-28")
.add("0300-03-01")
.add("0500-02-27")
.add("0500-02-28")
.add("0600-02-26")
.add("0600-02-27")
.add("0700-02-25")
.add("0700-02-26")
.add("0900-02-24")
.add("0900-02-25")
.add("1000-02-23")
.add("1000-02-24")
.add("1100-02-22")
.add("1100-02-23")
.add("1300-02-21")
.add("1300-02-22")
.add("1400-02-20")
.add("1400-02-21")
.add("1500-02-19")
.add("1500-02-20")
.add("1582-02-04")
.build();

dates.forEach(date -> {
int hybridDays = toHybridDaysFromString(date);
int gregorianDays = (int) LocalDate.parse(date).toEpochDay();
assertThat(convertProlepticDaysToHybridDays(gregorianDays)).isEqualTo(hybridDays);
assertThat(convertDaysToProlepticGregorian(hybridDays)).isEqualTo(gregorianDays);
});
}

@Test
void testRebaseNotExistedDatesInHybridCalendar()
{
Map<String, String> dates = ImmutableMap.<String, String>builder()
.put("1582-10-04", "1582-10-04")
.put("1582-10-05", "1582-10-15")
.put("1582-10-06", "1582-10-15")
.put("1582-10-07", "1582-10-15")
.put("1582-10-08", "1582-10-15")
.put("1582-10-09", "1582-10-15")
.put("1582-10-11", "1582-10-15")
.put("1582-10-12", "1582-10-15")
.put("1582-10-13", "1582-10-15")
.put("1582-10-14", "1582-10-15")
.put("1582-10-15", "1582-10-15")
.buildOrThrow();

dates.forEach((gregDate, hybridDate) -> {
int hybridDays = toHybridDaysFromString(hybridDate);
int gregorianDays = (int) LocalDate.parse(gregDate).toEpochDay();
int actualHybridDays = convertProlepticDaysToHybridDays(gregorianDays);
assertThat(actualHybridDays).isEqualTo(hybridDays);
});
}

@Test
void testConvertGregorianTimestampToAndFromHybridDays()
{
ImmutableList<String> timestamps = ImmutableList.of(
"0001-01-01 15:15:15.123",
"1000-01-01 15:15:15.123",
"1582-10-04 15:15:15.123",
"1582-10-15 15:15:15.123",
"1788-09-10 15:15:15.123",
"1888-12-31 15:15:15.123",
"1969-12-31 15:15:15.123",
"1970-01-01 15:15:15.123",
"2024-03-30 15:15:15.123");

timestamps.forEach(timestamp -> {
try {
long julianMillis = HYBRID_CALENDAR_DATE_TIME_FORMAT.get().parse(timestamp).getTime();
long gregorianMillis = PROLEPTIC_CALENDAR_DATE_TIME_FORMAT.get().parse(timestamp).getTime();
assertThat(convertTimestampToProlepticGregorian(julianMillis)).isEqualTo(gregorianMillis);
}
catch (ParseException e) {
throw new RuntimeException(e);
}
});
}

@Test
void testRebaseNotExistedTimestampInHybridCalendar()
{
Map<String, String> timestamps = ImmutableMap.<String, String>builder()
.put("1582-10-04 15:15:15.123", "1582-10-04 15:15:15.123")
.put("1582-10-05 15:15:15.123", "1582-10-15 15:15:15.123")
.put("1582-10-06 15:15:15.123", "1582-10-15 15:15:15.123")
.put("1582-10-07 15:15:15.123", "1582-10-15 15:15:15.123")
.put("1582-10-08 15:15:15.123", "1582-10-15 15:15:15.123")
.put("1582-10-09 15:15:15.123", "1582-10-15 15:15:15.123")
.put("1582-10-11 15:15:15.123", "1582-10-15 15:15:15.123")
.put("1582-10-12 15:15:15.123", "1582-10-15 15:15:15.123")
.put("1582-10-13 15:15:15.123", "1582-10-15 15:15:15.123")
.put("1582-10-14 15:15:15.123", "1582-10-15 15:15:15.123")
.put("1582-10-15 15:15:15.123", "1582-10-15 15:15:15.123")
.buildOrThrow();

timestamps.forEach((gregorianTmst, hybridTmst) -> {
try {
long julianMillis = HYBRID_CALENDAR_DATE_TIME_FORMAT.get().parse(hybridTmst).getTime();
long gregorianMillis = PROLEPTIC_CALENDAR_DATE_TIME_FORMAT.get().parse(gregorianTmst).getTime();
assertThat(convertTimestampToHybrid(gregorianMillis)).isEqualTo(julianMillis);
}
catch (ParseException e) {
throw new RuntimeException(e);
}
});
}
}
Loading

0 comments on commit 1d8213c

Please sign in to comment.