Skip to content

Commit

Permalink
Optimize types.ParseTime and fix typo (hypermodeinc#4693)
Browse files Browse the repository at this point in the history
Also fixed typo in test.sh

BenchmarkParseTime-8               6821          3822          -43.97%
BenchmarkParseTimeRejections-8     20843         5712          -72.60%

benchmark                          old allocs     new allocs     delta
BenchmarkParseTime-8               39             14             -64.10%
BenchmarkParseTimeRejections-8     197            43             -78.17%

benchmark                          old bytes     new bytes     delta
BenchmarkParseTime-8               2144          736           -65.67%
BenchmarkParseTimeRejections-8     12384         3200          -74.16%
  • Loading branch information
Alvin Ali Khaled authored Feb 6, 2020
1 parent 55b1693 commit 23c3c3e
Show file tree
Hide file tree
Showing 4 changed files with 115 additions and 56 deletions.
2 changes: 1 addition & 1 deletion test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
# - bash
# - curl
# - coreutils
# - gnu-getop
# - gnu-getopt
# - findutils
#
# Your $PATH must have all required packages in .bashrc:
Expand Down
6 changes: 3 additions & 3 deletions types/conversion_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ func TestConversionEdgeCases(t *testing.T) {
failure: "strconv.ParseBool"},
{in: Val{Tid: StringID, Value: []byte{}},
out: Val{Tid: DateTimeID, Value: time.Time{}},
failure: `parsing time "" as "2006": cannot parse "" as "2006"`},
failure: `parsing time "" as "2006-01-02T15:04:05": cannot parse "" as "2006"`},

// From IntID to X
{in: Val{Tid: IntID, Value: []byte{}},
Expand All @@ -142,12 +142,12 @@ func TestConversionEdgeCases(t *testing.T) {
// From DateTimeID to X
{in: Val{Tid: DateTimeID, Value: []byte{}},
out: Val{Tid: DateTimeID, Value: time.Time{}},
failure: "Time.UnmarshalBinary:"},
failure: "Time.UnmarshalBinary: no data"},
{in: Val{Tid: DateTimeID, Value: bs(time.Time{})},
out: Val{Tid: DateTimeID, Value: time.Time{}}},
{in: Val{Tid: DateTimeID, Value: []byte{}},
out: Val{Tid: BinaryID, Value: []byte{}},
failure: "Time.UnmarshalBinary"},
failure: "Time.UnmarshalBinary: no data"},
{in: Val{Tid: DateTimeID, Value: bs(time.Time{})},
out: Val{Tid: BinaryID, Value: bs(time.Time{})}},
{in: Val{Tid: DateTimeID, Value: []byte{}},
Expand Down
36 changes: 18 additions & 18 deletions types/scalar_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@ import (
)

const nanoSecondsInSec = 1000000000
const dateFormatY = "2006" // time.longYear
const dateFormatYM = "2006-01"
const dateFormatYMD = "2006-01-02"
const dateFormatYMDZone = "2006-01-02 15:04:05 -0700 MST"
const dateTimeFormat = "2006-01-02T15:04:05"

// Note: These ids are stored in the posting lists to indicate the type
// of the data. The order *cannot* be changed without breaking existing
Expand Down Expand Up @@ -190,28 +195,23 @@ func ValueForType(id TypeID) Val {
// ParseTime parses the time from string trying various datetime formats.
// By default, Go parses time in UTC unless specified in the data itself.
func ParseTime(val string) (time.Time, error) {
var t time.Time
if err := t.UnmarshalText([]byte(val)); err == nil {
return t, err
if len(val) == len(dateFormatY) {
return time.Parse(dateFormatY, val)
}
if t, err := time.Parse(dateFormatYMDZone, val); err == nil {
return t, err
if len(val) == len(dateFormatYM) {
return time.Parse(dateFormatYM, val)
}
// try without timezone
if t, err := time.Parse(dateTimeFormat, val); err == nil {
return t, err
if len(val) == len(dateFormatYMD) {
return time.Parse(dateFormatYMD, val)
}
if t, err := time.Parse(dateFormatYMD, val); err == nil {
return t, err
if len(val) > len(dateTimeFormat) && val[len(dateFormatYMD)] == 'T' &&
(val[len(val)-1] == 'Z' || val[len(val)-3] == ':') {
// https://tools.ietf.org/html/rfc3339#section-5.6
return time.Parse(time.RFC3339, val)
}
if t, err := time.Parse(dateFormatYM, val); err == nil {
if t, err := time.Parse(dateFormatYMDZone, val); err == nil {
return t, err
}
return time.Parse(dateFormatY, val)
// Try without timezone.
return time.Parse(dateTimeFormat, val)
}

const dateFormatYMDZone = "2006-01-02 15:04:05 -0700 MST"
const dateFormatYMD = "2006-01-02"
const dateFormatYM = "2006-01"
const dateFormatY = "2006"
const dateTimeFormat = "2006-01-02T15:04:05"
127 changes: 93 additions & 34 deletions types/scalar_types_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,65 @@ import (
"github.com/stretchr/testify/require"
)

var datesWithTz = []struct {
in string
out time.Time
}{
{in: "2018-10-28T04:00:10Z",
out: time.Date(2018, 10, 28, 4, 00, 10, 0, time.UTC)},
{in: "2018-10-28T04:00:10-00:00",
out: time.Date(2018, 10, 28, 4, 00, 10, 0, time.UTC)},
{in: "2018-05-30T09:30:10.5Z",
out: time.Date(2018, 5, 30, 9, 30, 10, 500000000, time.UTC)},
{in: "2018-05-30T09:30:10.5-00:00",
out: time.Date(2018, 5, 30, 9, 30, 10, 500000000, time.UTC)},
{in: "2018-05-30T09:30:10-06:00",
out: time.Date(2018, 5, 30, 9, 30, 10, 0, time.FixedZone("", -6*60*60))},
{in: "2018-05-28T14:41:57+30:00",
out: time.Date(2018, 5, 28, 14, 41, 57, 0, time.FixedZone("", 30*60*60))},
}

var datesWithoutTz = []struct {
in string
out time.Time
}{
{in: "2018-10-28T04:00:10",
out: time.Date(2018, 10, 28, 4, 00, 10, 0, time.UTC)},
{in: "2018-05-30T09:30:10.5",
out: time.Date(2018, 5, 30, 9, 30, 10, 500000000, time.UTC)},
{in: "2018",
out: time.Date(2018, 1, 1, 0, 0, 0, 0, time.UTC)},
{in: "2018-01",
out: time.Date(2018, 1, 1, 0, 0, 0, 0, time.UTC)},
{in: "2018-01-01",
out: time.Date(2018, 1, 1, 0, 0, 0, 0, time.UTC)},
}

var invalidDates = []string{
"abcd",
"12345",
"123456",
"1234567",
"12345678",
"123456789",
"1234567891",
"11111111111111111Z",
"111111111111111:11",
"1111-11-11T11:11111111:1",
"1111-11-11T11:11:1111:11",
"18-10-28T04:00:10Z",
"318-10-28T04:00:10",
"2018-110-28T04:00:10",
"20181-4-28T25:00:10",
"2018-10-218T04:00:10",
"2018-14-28T25:00:10",
"2018-142-8T25:00:10",
"2018-05-33T09:65:10.5",
"201",
"2018-011",
"2018-01-011",
}

func TestTypeForName(t *testing.T) {
for name, tid := range typeNameMap {
typ, ok := TypeForName(name)
Expand All @@ -46,22 +105,7 @@ func TestValueForType(t *testing.T) {
}

func TestParseTimeWithoutTZ(t *testing.T) {
tests := []struct {
in string
out time.Time
}{
{in: "2018-10-28T04:00:10",
out: time.Date(2018, 10, 28, 4, 00, 10, 0, time.UTC)},
{in: "2018-05-30T09:30:10.5",
out: time.Date(2018, 5, 30, 9, 30, 10, 500000000, time.UTC)},
{in: "2018",
out: time.Date(2018, 1, 1, 0, 0, 0, 0, time.UTC)},
{in: "2018-01",
out: time.Date(2018, 1, 1, 0, 0, 0, 0, time.UTC)},
{in: "2018-01-01",
out: time.Date(2018, 1, 1, 0, 0, 0, 0, time.UTC)},
}
for _, tc := range tests {
for _, tc := range datesWithoutTz {
out, err := ParseTime(tc.in)
require.NoError(t, err)
require.EqualValues(t, tc.out, out)
Expand All @@ -75,26 +119,41 @@ func TestParseTimeWithTZ(t *testing.T) {
time.Local, err = time.LoadLocation("UTC")
require.NoError(t, err)

tests := []struct {
in string
out time.Time
}{
{in: "2018-10-28T04:00:10Z",
out: time.Date(2018, 10, 28, 4, 00, 10, 0, time.UTC)},
{in: "2018-10-28T04:00:10-00:00",
out: time.Date(2018, 10, 28, 4, 00, 10, 0, time.UTC)},
{in: "2018-05-30T09:30:10.5Z",
out: time.Date(2018, 5, 30, 9, 30, 10, 500000000, time.UTC)},
{in: "2018-05-30T09:30:10.5-00:00",
out: time.Date(2018, 5, 30, 9, 30, 10, 500000000, time.UTC)},
{in: "2018-05-30T09:30:10-06:00",
out: time.Date(2018, 5, 30, 9, 30, 10, 0, time.FixedZone("", -6*60*60))},
{in: "2018-05-28T14:41:57+30:00",
out: time.Date(2018, 5, 28, 14, 41, 57, 0, time.FixedZone("", 30*60*60))},
}
for _, tc := range tests {
for _, tc := range datesWithTz {
out, err := ParseTime(tc.in)
require.NoError(t, err)
require.EqualValues(t, tc.out, out)
}
}

func TestParseTimeRejection(t *testing.T) {
var err error

// Set local time to UTC.
time.Local, err = time.LoadLocation("UTC")
require.NoError(t, err)

for _, invalidDate := range invalidDates {
_, err := ParseTime(invalidDate)
require.Error(t, err)
}
}

func BenchmarkParseTime(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, tc := range datesWithTz {
ParseTime(tc.in)
}
for _, tc := range datesWithoutTz {
ParseTime(tc.in)
}
}
}

func BenchmarkParseTimeRejections(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, invalidDate := range invalidDates {
ParseTime(invalidDate)
}
}
}

0 comments on commit 23c3c3e

Please sign in to comment.