Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce verbosity and remove colorization ref #4 #16

Merged
merged 1 commit into from
Mar 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,10 @@ RoxygenNote: 7.3.1
Imports:
cli,
rlang,
crayon,
tzdb
Suggests:
knitr,
stringi,
testthat,
withr,
hms


88 changes: 34 additions & 54 deletions R/parser.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,20 +31,26 @@ collector_find <- function(name) {
#' @family parsers
#' @param x Character vector of elements to parse.
#' @param collector Column specification.
#' @param .return_problems Whether to hide the `problems` tibble from the output
#' @keywords internal
#' @export
#' @examples
#' x <- c("1", "2", "3", "NA")
#' parse_vector(x, col_integer())
#' parse_vector(x, col_double())
parse_vector <- function(x, collector, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE) {
parse_vector <- function(x, collector, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE, .return_problems = FALSE) {
stopifnot(is.character(x))
if (is.character(collector)) {
collector <- collector_find(collector)
}

## warn_problems(parse_vector_(x, collector, na = na, locale_ = locale, trim_ws = trim_ws))
parse_vector_(x, collector, na = na, locale_ = locale, trim_ws = trim_ws)
res <- parse_vector_(x, collector, na = na, locale_ = locale, trim_ws = trim_ws)
if (.return_problems || is.null(attr(res, "problems"))) {
return(res)
}
attr(res, "problems") <- NULL
return(res)
}

#' Parse logicals, integers, and reals
Expand All @@ -65,6 +71,7 @@ parse_vector <- function(x, collector, na = c("", "NA"), locale = default_locale
#' names.
#' @param trim_ws Should leading and trailing whitespace (ASCII spaces and tabs) be trimmed from
#' each field before parsing it?
#' @inheritParams parse_vector
#' @family parsers
#' @examples
#' parse_integer(c("1", "2", "3"))
Expand All @@ -84,26 +91,26 @@ NULL

#' @rdname parse_atomic
#' @export
parse_logical <- function(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE) {
parse_vector(x, col_logical(), na = na, locale = locale, trim_ws = trim_ws)
parse_logical <- function(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE, .return_problems = FALSE) {
parse_vector(x, col_logical(), na = na, locale = locale, trim_ws = trim_ws, .return_problems = .return_problems)
}

#' @rdname parse_atomic
#' @export
parse_integer <- function(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE) {
parse_vector(x, col_integer(), na = na, locale = locale, trim_ws = trim_ws)
parse_integer <- function(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE, .return_problems = FALSE) {
parse_vector(x, col_integer(), na = na, locale = locale, trim_ws = trim_ws, .return_problems = .return_problems)
}

#' @rdname parse_atomic
#' @export
parse_double <- function(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE) {
parse_vector(x, col_double(), na = na, locale = locale, trim_ws = trim_ws)
parse_double <- function(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE, .return_problems = FALSE) {
parse_vector(x, col_double(), na = na, locale = locale, trim_ws = trim_ws, .return_problems = .return_problems)
}

#' @rdname parse_atomic
#' @export
parse_character <- function(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE) {
parse_vector(x, col_character(), na = na, locale = locale, trim_ws = trim_ws)
parse_character <- function(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE, .return_problems = FALSE) {
parse_vector(x, col_character(), na = na, locale = locale, trim_ws = trim_ws, .return_problems = .return_problems)
}

#' @rdname parse_atomic
Expand Down Expand Up @@ -166,8 +173,8 @@ col_skip <- function() {
#' ## Specifying strings for NAs
#' parse_number(c("1", "2", "3", "NA"))
#' parse_number(c("1", "2", "3", "NA", "Nothing"), na = c("NA", "Nothing"))
parse_number <- function(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE) {
parse_vector(x, col_number(), na = na, locale = locale, trim_ws = trim_ws)
parse_number <- function(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE, .return_problems = FALSE) {
parse_vector(x, col_number(), na = na, locale = locale, trim_ws = trim_ws, .return_problems = .return_problems)
}

#' @rdname parse_number
Expand Down Expand Up @@ -203,8 +210,9 @@ col_number <- function() {
#' # ISO 8601 date times
#' guess_parser(c("2010-10-10"))
#' parse_guess(c("2010-10-10"))
parse_guess <- function(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE, guess_integer = FALSE) {
parse_vector(x, guess_parser(x, locale, guess_integer = guess_integer, na = na), na = na, locale = locale, trim_ws = trim_ws)
parse_guess <- function(x, na = c("", "NA"), locale = default_locale(), trim_ws = TRUE, guess_integer = FALSE, .return_problems = FALSE) {
parse_vector(x, guess_parser(x, locale, guess_integer = guess_integer, na = na), na = na, locale = locale, trim_ws = trim_ws,
.return_problems = .return_problems)
}

#' @rdname parse_guess
Expand Down Expand Up @@ -262,8 +270,9 @@ guess_parser <- function(x, locale = default_locale(), guess_integer = FALSE, na
#' # and reports problems
#' parse_factor(x, levels = animals)
parse_factor <- function(x, levels = NULL, ordered = FALSE, na = c("", "NA"),
locale = default_locale(), include_na = TRUE, trim_ws = TRUE) {
parse_vector(x, col_factor(levels, ordered, include_na), na = na, locale = locale, trim_ws = trim_ws)
locale = default_locale(), include_na = TRUE, trim_ws = TRUE, .return_problems = FALSE) {
parse_vector(x, col_factor(levels, ordered, include_na), na = na, locale = locale, trim_ws = trim_ws,
.return_problems = .return_problems)
}

#' @rdname parse_factor
Expand Down Expand Up @@ -401,20 +410,20 @@ col_factor <- function(levels = NULL, ordered = FALSE, include_na = FALSE) {
#' parse_datetime("1979-10-14T1010Z", locale = us_central)
#' # Your current time zone
#' parse_datetime("1979-10-14T1010", locale = locale(tz = ""))
parse_datetime <- function(x, format = "", na = c("", "NA"), locale = default_locale(), trim_ws = TRUE) {
parse_vector(x, col_datetime(format), na = na, locale = locale, trim_ws = trim_ws)
parse_datetime <- function(x, format = "", na = c("", "NA"), locale = default_locale(), trim_ws = TRUE, .return_problems = FALSE) {
parse_vector(x, col_datetime(format), na = na, locale = locale, trim_ws = trim_ws, .return_problems = .return_problems)
}

#' @rdname parse_datetime
#' @export
parse_date <- function(x, format = "", na = c("", "NA"), locale = default_locale(), trim_ws = TRUE) {
parse_vector(x, col_date(format), na = na, locale = locale, trim_ws = trim_ws)
parse_date <- function(x, format = "", na = c("", "NA"), locale = default_locale(), trim_ws = TRUE, .return_problems = FALSE) {
parse_vector(x, col_date(format), na = na, locale = locale, trim_ws = trim_ws, .return_problems = .return_problems)
}

#' @rdname parse_datetime
#' @export
parse_time <- function(x, format = "", na = c("", "NA"), locale = default_locale(), trim_ws = TRUE) {
parse_vector(x, col_time(format), na = na, locale = locale, trim_ws = trim_ws)
parse_time <- function(x, format = "", na = c("", "NA"), locale = default_locale(), trim_ws = TRUE, .return_problems = FALSE) {
parse_vector(x, col_time(format), na = na, locale = locale, trim_ws = trim_ws, .return_problems = .return_problems)
}

#' @rdname parse_datetime
Expand Down Expand Up @@ -877,8 +886,8 @@ as.character.col_spec <- function(x, ...) {
}

#' @export
print.col_spec <- function(x, n = Inf, condense = NULL, colour = crayon::has_color(), ...) {
cat(format.col_spec(x, n = n, condense = condense, colour = colour, ...))
print.col_spec <- function(x, n = Inf, condense = NULL, ...) {
cat(format.col_spec(x, n = n, condense = condense, ...))

invisible(x)
}
Expand All @@ -894,7 +903,7 @@ cols_condense <- function(x) {
}

#' @export
format.col_spec <- function(x, n = Inf, condense = NULL, colour = crayon::has_color(), ...) {
format.col_spec <- function(x, n = Inf, condense = NULL, ...) {
if (n == 0) {
return("")
}
Expand Down Expand Up @@ -929,7 +938,6 @@ format.col_spec <- function(x, n = Inf, condense = NULL, colour = crayon::has_co
args <- paste(names(args), args, sep = " = ", collapse = ", ")

col_funs <- paste0(col_funs, "(", args, ")")
col_funs <- colourise_cols(col_funs, colour)

col_names <- names(cols)[[i]] %||% ""

Expand Down Expand Up @@ -961,34 +969,6 @@ format.col_spec <- function(x, n = Inf, condense = NULL, colour = crayon::has_co
out
}

colourise_cols <- function(cols, colourise = crayon::has_color()) {
if (!isTRUE(colourise)) {
return(cols)
}

fname <- sub("[(].*", "", cols)
for (i in seq_along(cols)) {
cols[[i]] <- switch(fname,
col_skip = ,
col_guess = cols[[i]],

col_character = ,
col_factor = crayon::red(cols[[i]]),

col_logical = crayon::yellow(cols[[i]]),

col_double = ,
col_integer = ,
col_number = crayon::green(cols[[i]]),

col_date = ,
col_datetime = ,
col_time = crayon::blue(cols[[i]])
)
}
cols
}

# Used in read_delim(), read_fwf() and type_convert()
show_cols_spec <- function(spec, n = getOption("readr.num_columns", 20)) {
if (n > 0) {
Expand Down
6 changes: 4 additions & 2 deletions R/type_convert.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#' a string. See `vignette("readr")` for more details.
#'
#' If `NULL`, column types will be imputed using all rows.
#' @param verbose whether to print messages
#' @inheritParams guess_parser
#' @note `type_convert()` removes a 'spec' attribute,
#' because it likely modifies the column data types.
Expand All @@ -27,7 +28,8 @@
#' df <- data.frame(x = c("NA", "10"), stringsAsFactors = FALSE)
#' str(type_convert(df))
type_convert <- function(df, col_types = NULL, na = c("", "NA"), trim_ws = TRUE,
locale = default_locale(), guess_integer = FALSE) {
locale = default_locale(), guess_integer = FALSE,
verbose = FALSE) {
stopifnot(is.data.frame(df))
is_character <- vapply(df, is.character, logical(1))

Expand All @@ -54,7 +56,7 @@ type_convert <- function(df, col_types = NULL, na = c("", "NA"), trim_ws = TRUE,
)

## if (is.null(col_types) && !is_testing()) {
if (is.null(col_types)) {
if (is.null(col_types) && verbose) {
show_cols_spec(specs)
}

Expand Down
41 changes: 40 additions & 1 deletion README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ knitr::opts_chunk$set(

`readr`'s 1e type inferencing and parsing tools are used by various R packages, e.g. `readODS` and `surveytoolbox`, but ironically those packages do not use the main functions (e.g. `readr::read_delim()`) of `readr`. As explained in the README of `readr`, those 1e code will be eventually removed from `readr`.

`minty` aims at providing a set of minimal, long-term, and compatible type inferencing and parsing tools for those packages.
`minty` aims at providing a set of minimal, long-term, and compatible type inferencing and parsing tools for those packages. If you need to parse interactively, please use either `readr` or `vroom`.

## Installation

Expand Down Expand Up @@ -107,3 +107,42 @@ res
```{r}
str(res)
```

## Differences: `readr` vs `minty`

Unlike `readr` and `vroom`, please note that `minty` is mainly for **non-interactive usage**. Therefore, `minty` emits fewer messages and warnings than `readr` and `vroom`.

```{r}
data <- minty::type_convert(text_only)
data
```

```{r}
data <- readr::type_convert(text_only)
data
```

`verbose` option is added if you like those messages, default to `FALSE`.

```{r}
data <- minty::type_convert(text_only, verbose = TRUE)
```

At the moment, `minty` does not use [the `problems` mechanism](https://vroom.r-lib.org/reference/problems.html) by default.

```{r}
minty::parse_logical(c("true", "fake", "IDK"), na = "IDK")
```

```{r}
readr::parse_logical(c("true", "fake", "IDK"), na = "IDK")
```

## Similar packages

For parsing ambiguous date(time)

* [timeless](https://github.com/schochastics/timeless)
* [anytime](https://github.com/eddelbuettel/anytime)


Loading