diff --git a/DESCRIPTION b/DESCRIPTION index e8d100c..2bb97b1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: minty Title: Minimal Type Guesser -Version: 0.0.1 +Version: 0.0.2 Authors@R: c( person("Chung-hong", "Chan", role = c("aut", "cre"), email = "chainsawtiney@gmail.com", comment = c(ORCID = "0000-0002-6232-7530")), person("Hadley", "Wickham", , "hadley@posit.co", role = "aut", comment = "author of the ported code from readr"), diff --git a/README.Rmd b/README.Rmd index d195f7e..01a6ef8 100644 --- a/README.Rmd +++ b/README.Rmd @@ -140,6 +140,17 @@ minty::parse_logical(c("true", "fake", "IDK"), na = "IDK") readr::parse_logical(c("true", "fake", "IDK"), na = "IDK") ``` +Some features from `vroom` have been ported to `minty`, but not `readr`. + +```{r} +## tidyverse/readr#1526 +minty::type_convert(data.frame(a=c("NaN", "Inf", "-INF"))) |> str() +``` + +```{r} +readr::type_convert(data.frame(a=c("NaN", "Inf", "-INF"))) |> str() +``` + ## Similar packages For parsing ambiguous date(time) diff --git a/README.md b/README.md index 05d6c8e..56b17a0 100644 --- a/README.md +++ b/README.md @@ -208,6 +208,26 @@ readr::parse_logical(c("true", "fake", "IDK"), na = "IDK") #> 1 2 NA 1/0/T/F/TRUE/FALSE fake ``` +Some features from `vroom` have been ported to `minty`, but not `readr`. + +``` r +## tidyverse/readr#1526 +minty::type_convert(data.frame(a=c("NaN", "Inf", "-INF"))) |> str() +#> 'data.frame': 3 obs. of 1 variable: +#> $ a: num NaN Inf -Inf +``` + +``` r +readr::type_convert(data.frame(a=c("NaN", "Inf", "-INF"))) |> str() +#> +#> ── Column specification ──────────────────────────────────────────────────────── +#> cols( +#> a = col_character() +#> ) +#> 'data.frame': 3 obs. of 1 variable: +#> $ a: chr "NaN" "Inf" "-INF" +``` + ## Similar packages For parsing ambiguous date(time) diff --git a/src/QiParsers.h b/src/QiParsers.h index 128aea8..1428b6f 100644 --- a/src/QiParsers.h +++ b/src/QiParsers.h @@ -2,6 +2,7 @@ #define FASTREAD_QI_PARSERS #include "Rinternals.h" +#include // tolower /* An STL iterator-based string to floating point number conversion. @@ -17,10 +18,10 @@ bsd_strtod(const char* begin, const char** endptr, const char decimal_mark) { if (begin == *endptr) { return NA_REAL; } - if (*begin == 'n' || *begin == '?') { - *endptr = begin; - return NA_REAL; - } + // if (*begin == 'n' || *begin == '?') { + // *endptr = begin; + // return NA_REAL; + // } int sign = 0, expSign = 0, i; double fraction, dblExp; const char* p; @@ -93,6 +94,18 @@ bsd_strtod(const char* begin, const char** endptr, const char decimal_mark) { } else if (p != *endptr && *p == '+') ++p; + // Code ported from vroom + /* NaN */ + if (*endptr - p == 3 && tolower(p[0]) == 'n' && tolower(p[1]) == 'a' && + tolower(p[2]) == 'n') { + return NAN; + } + /* Inf */ + if (*endptr - p == 3 && tolower(p[0]) == 'i' && tolower(p[1]) == 'n' && + tolower(p[2]) == 'f') { + return sign == 1 ? -HUGE_VAL : HUGE_VAL; + } + /* If we don't have a digit or decimal point something is wrong, so return an * NA */ if (!(isdigit(*p) || *p == decimal_mark)) { diff --git a/tests/testthat/test-parsing-numeric.R b/tests/testthat/test-parsing-numeric.R index 0411120..841a6e7 100644 --- a/tests/testthat/test-parsing-numeric.R +++ b/tests/testthat/test-parsing-numeric.R @@ -127,3 +127,25 @@ test_that("scientific notation is parsed properly", { expect_equal(parse_number("-17E-5-5"), -0.00017) expect_equal(parse_number("1.2E-3"), 0.0012) }) + +## Inf NAN NA ref gesistsa/minty#11 + +test_that("special cases", { + ## Inf + expect_equal(parse_double("Inf"), Inf) + expect_equal(parse_double("INF"), Inf) + expect_equal(parse_double("-inf"), -Inf) + expect_equal(parse_double("-Inf"), -Inf) + expect_equal(parse_double("Infa"), NA_real_) + ## NAN + expect_equal(parse_double("NAN"), NaN) + expect_equal(parse_double("Nan"), NaN) + expect_equal(parse_double("-nan"), NaN) + expect_equal(parse_double("-nan"), NaN) + expect_equal(parse_double("nana"), NA_real_) + ## NA + expect_equal(parse_double("NA"), NA_real_) + expect_equal(parse_double("Naan"), NA_real_) ## in fact.. + ## integration + expect_equal(parse_double(c("NA", "NaN", "Inf", "3.14")), c(NA_real_, NaN, Inf, 3.14)) +})