From 8234e7cf57809167fdaaa927571148234426f01f Mon Sep 17 00:00:00 2001 From: Emanuele Guidotti Date: Tue, 24 Dec 2024 23:29:16 +0100 Subject: [PATCH] Fix #15 --- README.md | 7 +- r/DESCRIPTION | 17 ++-- r/LICENSE | 2 + r/NAMESPACE | 5 +- r/R/ar.R | 29 +++--- r/R/cs.R | 42 ++++---- r/R/edge.R | 188 +++++++++++++++++++++++++++++------ r/R/ohlc.R | 97 +++++++++--------- r/R/roll.R | 32 +++--- r/R/sim.R | 46 +++++---- r/R/spread.R | 95 +++++++++++------- r/R/utils.R | 57 ++++++----- r/README.md | 67 +++++++++++-- r/man/AR.Rd | 2 +- r/man/CS.Rd | 2 +- r/man/OHLC.Rd | 2 +- r/man/ROLL.Rd | 2 +- r/man/bidask-package.Rd | 2 +- r/man/edge_expanding.Rd | 50 ++++++++++ r/man/edge_rolling.Rd | 64 ++++++++++++ r/man/rfun.Rd | 12 +++ r/man/rmean.Rd | 2 +- r/man/rsum.Rd | 6 +- r/man/sim.Rd | 16 +-- r/man/spread.Rd | 47 ++++++--- r/tests/testthat/test-edge.R | 121 +++++++++++++++++++--- r/vignettes/bidask.Rmd | 89 ++++++++++------- 27 files changed, 785 insertions(+), 316 deletions(-) create mode 100644 r/LICENSE create mode 100644 r/man/edge_expanding.Rd create mode 100644 r/man/edge_rolling.Rd create mode 100644 r/man/rfun.Rd diff --git a/README.md b/README.md index f11e183..6b0db4b 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices -This [repository](https://github.com/eguidotti/bidask/) implements an efficient estimator of the effective bid-ask spread from open, high, low, and close prices as described in: +This [repository](https://github.com/eguidotti/bidask/) implements the efficient estimator of the effective bid-ask spread from open, high, low, and close prices described in: > Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. *Journal of Financial Economics*, 161, 103916. [doi: 10.1016/j.jfineco.2024.103916](https://doi.org/10.1016/j.jfineco.2024.103916) @@ -33,10 +33,7 @@ You can browse publications related to the paper [here](https://scholar.google.c ## Terms of use -All code is released under the [GPL-3.0](https://github.com/eguidotti/bidask/?tab=GPL-3.0-1-ov-file#GPL-3.0-1-ov-file) license. All data are released under the [CC BY 4.0](http://creativecommons.org/licenses/by/4.0) license. When using any data or code from this repository, you agree to: - -- cite [Ardia, Guidotti, & Kroencke (2024)](https://doi.org/10.1016/j.jfineco.2024.103916) as indicated below -- place the link [https://github.com/eguidotti/bidask](https://github.com/eguidotti/bidask) in a footnote to help others find this repository +All code is released under the [MIT](https://github.com/eguidotti/bidask?tab=MIT-1-ov-file#readme) license. All data are released under the [CC BY 4.0](http://creativecommons.org/licenses/by/4.0) license. When using any data or code from this repository, please cite the reference indicated below. ## Cite as diff --git a/r/DESCRIPTION b/r/DESCRIPTION index be9f975..13ff9f2 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -7,22 +7,25 @@ Authors@R: c( person(given = "David", family = "Ardia", role = c("ctb"), comment = c(ORCID = "0000-0003-2823-782X")), person(given = "Tim", family = "Kroencke", role = c("ctb"), comment = c(ORCID = "0000-0001-8700-356X")) ) -Description: Implements an efficient estimator of bid-ask spreads from open, high, low, and close prices - as described in Ardia, Guidotti, & Kroencke (2024) . +Description: Implements the efficient estimator of bid-ask spreads from open, high, low, and close prices + described in Ardia, Guidotti, & Kroencke (JFE, 2024) . It also provides an implementation of the estimators described in - Roll (1984) , - Corwin & Schultz (2012) , - and Abdi & Ranaldo (2017) . -License: GPL-3 + Roll (JF, 1984) , + Corwin & Schultz (JF, 2012) , + and Abdi & Ranaldo (RFS, 2017) . +License: MIT + file LICENSE URL: https://github.com/eguidotti/bidask BugReports: https://github.com/eguidotti/bidask/issues Encoding: UTF-8 -Imports: xts, zoo +Imports: data.table RoxygenNote: 7.2.3 Suggests: + xts, + zoo, dplyr, crypto2, quantmod, + ggplot2, knitr, rmarkdown, testthat (>= 3.0.0) diff --git a/r/LICENSE b/r/LICENSE new file mode 100644 index 0000000..d7d69c3 --- /dev/null +++ b/r/LICENSE @@ -0,0 +1,2 @@ +YEAR: 2024 +COPYRIGHT HOLDER: Emanuele Guidotti \ No newline at end of file diff --git a/r/NAMESPACE b/r/NAMESPACE index 57dc8c1..2db4e87 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -1,9 +1,10 @@ # Generated by roxygen2: do not edit by hand export(edge) +export(edge_expanding) +export(edge_rolling) export(sim) export(spread) -import(xts) -importFrom(stats,lag) +import(data.table) importFrom(stats,rbinom) importFrom(stats,rnorm) diff --git a/r/R/ar.R b/r/R/ar.R index b9ae23a..b8dda02 100644 --- a/r/R/ar.R +++ b/r/R/ar.R @@ -2,37 +2,40 @@ #' #' @keywords internal #' -AR <- function(x, width = nrow(x), method, sign, na.rm){ +AR <- function(high, low, close, width, method, sign, na.rm){ ok <- c("AR","AR2") if(length(ko <- setdiff(method, ok))) stop(sprintf("Method(s) '%s' not available. The available methods are '%s'.", paste(ko, collapse = "', '"), paste(ok, collapse = "', '"))) - x <- log(x) + h <- log(high) + l <- log(low) + c <- log(close) - M2 <- (x$HIGH+x$LOW)/2 - M1 <- lag(M2, 1)[-1,] - C1 <- lag(x$CLOSE, 1) + m2 <- (h + l) / 2 + m1 <- shift(m2, 1) + c1 <- shift(c, 1) - S2 <- 4*(C1-M1)*(C1-M2) + s2 <- 4 * (c1 - m1) * (c1 - m2) + shift <- 1 ar <- ar2 <- NULL if("AR" %in% method) { - ar <- rmean(S2, width = width-1, na.rm = na.rm) + ar <- rmean(s2, width = width, shift = shift, na.rm = na.rm) ar <- sign(ar) * sqrt(abs(ar)) if(!sign) ar <- abs(ar) - colnames(ar) <- "AR" + ar <- list("AR" = ar) } if("AR2" %in% method){ - S2[S2<0] <- 0 - S <- sqrt(S2) - ar2 <- rmean(S, width = width-1, na.rm = na.rm) - colnames(ar2) <- "AR2" + s2[s2 < 0] <- 0 + s <- sqrt(s2) + ar2 <- rmean(s, width = width, shift = shift, na.rm = na.rm) + ar2 <- list("AR2" = ar2) } - return(cbind(ar, ar2)) + return(c(ar, ar2)) } diff --git a/r/R/cs.R b/r/R/cs.R index d110eda..27fe043 100644 --- a/r/R/cs.R +++ b/r/R/cs.R @@ -2,46 +2,46 @@ #' #' @keywords internal #' -CS <- function(x, width = nrow(x), method, sign, na.rm){ +CS <- function(high, low, close, width, method, sign, na.rm){ ok <- c("CS","CS2") if(length(ko <- setdiff(method, ok))) stop(sprintf("Method(s) '%s' not available. The available methods are '%s'.", paste(ko, collapse = "', '"), paste(ok, collapse = "', '"))) - x <- log(x) + h <- log(high) + l <- log(low) + c <- log(close) - H <- x$HIGH[-1] - L <- x$LOW[-1] + c1 <- shift(c, 1) + h1 <- shift(h, 1) + l1 <- shift(l, 1) - C1 <- lag(x$CLOSE, 1)[-1] - H1 <- lag(x$HIGH, 1)[-1] - L1 <- lag(x$LOW, 1)[-1] - - GAP <- pmax(0, C1-H) + pmin(0, C1-L) - AH <- H + GAP - AL <- L + GAP + gap <- pmax(0, c1 - h) + pmin(0, c1 - l) + ah <- h + gap + al <- l + gap - B <- (H-L)^2 + (H1-L1)^2 - G <- (pmax(AH, H1) - pmin(AL, L1))^2 + b <- (h - l)^2 + (h1 - l1)^2 + g <- (pmax(ah, h1) - pmin(al, l1))^2 - A <- (sqrt(2*B)-sqrt(B))/(3-2*sqrt(2)) - sqrt(G/(3-2*sqrt(2))) - S <- 2*(exp(A)-1)/(1+exp(A)) + a <- (sqrt(2*b) - sqrt(b)) / (3 - 2*sqrt(2)) - sqrt(g / (3 - 2*sqrt(2))) + s <- 2*(exp(a) - 1) / (1 + exp(a)) + shift <- 1 cs <- cs2 <- NULL if("CS" %in% method) { - cs <- rmean(S, width = width-1, na.rm = na.rm) + cs <- rmean(s, width = width, shift = shift, na.rm = na.rm) if(!sign) cs <- abs(cs) - colnames(cs) <- "CS" + cs <- list("CS" = cs) } if("CS2" %in% method){ - S[S<0] <- 0 - cs2 <- rmean(S, width = width-1, na.rm = na.rm) - colnames(cs2) <- "CS2" + s[s < 0] <- 0 + cs2 <- rmean(s, width = width, shift = shift, na.rm = na.rm) + cs2 <- list("CS2" = cs2) } - return(cbind(cs, cs2)) + return(c(cs, cs2)) } diff --git a/r/R/edge.R b/r/R/edge.R index 6c6d49a..5f13eb3 100644 --- a/r/R/edge.R +++ b/r/R/edge.R @@ -2,21 +2,20 @@ #' #' @keywords internal #' -EDGE <- function(x, width = nrow(x), sign, na.rm){ +EDGE <- function(open, high, low, close, width, sign, na.rm, aslist = TRUE){ # compute log-prices - x <- log(x) - o <- x$OPEN - h <- x$HIGH - l <- x$LOW - c <- x$CLOSE + o <- log(open) + h <- log(high) + l <- log(low) + c <- log(close) m <- (h + l) / 2 # shift log-prices by one period - h1 <- lag(h, 1) - l1 <- lag(l, 1) - c1 <- lag(c, 1) - m1 <- lag(m, 1) + h1 <- shift(h, 1) + l1 <- shift(l, 1) + c1 <- shift(c, 1) + m1 <- shift(m, 1) # compute log-returns r1 <- m - o @@ -26,11 +25,11 @@ EDGE <- function(x, width = nrow(x), sign, na.rm){ r5 <- o - c1 # compute indicator variables - tau <- h != l | l != c1 - po1 <- tau & o != h - po2 <- tau & o != l - pc1 <- tau & c1 != h1 - pc2 <- tau & c1 != l1 + tau <- ifelse(is.na(h) | is.na(l) | is.na(c1), NA, h != l | l != c1) + po1 <- tau * (o != h) + po2 <- tau * (o != l) + pc1 <- tau * (c1 != h1) + pc2 <- tau * (c1 != l1) # compute base products for rolling means r12 <- r1 * r2 @@ -43,7 +42,7 @@ EDGE <- function(x, width = nrow(x), sign, na.rm){ tr5 <- tau * r5 # set up data frame for rolling means - x <- cbind( + x <- data.frame( r12, r34, r15, @@ -82,11 +81,11 @@ EDGE <- function(x, width = nrow(x), sign, na.rm){ # mask the first observation and decrement width by 1 before # computing rolling means to account for lagged prices - x <- x[-1] - width = width - 1 - + x[1,] <- NA + shift <- 1 + # compute rolling means - m <- rmean(x, width = width, na.rm = na.rm) + m <- rmean(x, width = width, shift = shift, na.rm = na.rm) # compute probabilities pt <- m[,5] @@ -95,8 +94,8 @@ EDGE <- function(x, width = nrow(x), sign, na.rm){ # set to missing if there are less than two periods with tau=1 # or po or pc is zero - nt <- rsum(x[,5], width = width, na.rm = TRUE) - m[nt < 2 | (!is.nan(po) & po == 0) | (!is.nan(pc) & pc == 0)] <- NaN + nt <- rsum(x[5], width = width, shift = shift, na.rm = TRUE) + m[which(nt < 2 | po == 0 | pc == 0),] <- NA # compute input vectors a1 <- -4. / po @@ -131,16 +130,16 @@ EDGE <- function(x, width = nrow(x), sign, na.rm){ # compute square spread by using a (equally) weighted # average if the total variance is (not) positive vt <- v1 + v2 - s2 <- ifelse(!is.na(vt) & vt > 0, (v2*e1 + v1*e2) / vt, (e1 + e2) / 2.) - colnames(s2) <- "EDGE" - + s2 <- ifelse(!is.na(vt) & vt > 0, (v2*e1 + v1*e2) / vt, (e1 + e2) / 2) + # compute signed root s <- sqrt(abs(s2)) if(sign) s <- s * base::sign(s2) # return the spread - return(s) + if(!aslist) return(s) + return(list("EDGE" = s)) } @@ -183,7 +182,7 @@ edge <- function(open, high, low, close, sign = FALSE){ # return missing if there are less than 3 observations if(n < 3) - return(NaN) + return(NA) # compute log-prices o <- log(as.numeric(open)) @@ -204,11 +203,11 @@ edge <- function(open, high, low, close, sign = FALSE){ r5 <- o - c1 # compute indicator variables - tau <- h != l | l != c1 - po1 <- tau & o != h - po2 <- tau & o != l - pc1 <- tau & c1 != h1 - pc2 <- tau & c1 != l1 + tau <- ifelse(is.na(h) | is.na(l) | is.na(c1), NA, h != l | l != c1) + po1 <- tau * (o != h) + po2 <- tau * (o != l) + pc1 <- tau * (c1 != h1) + pc2 <- tau * (c1 != l1) # compute probabilities pt <- mean(tau, na.rm = TRUE) @@ -219,7 +218,7 @@ edge <- function(open, high, low, close, sign = FALSE){ # or po or pc is zero nt <- sum(tau, na.rm = TRUE) if(nt < 2 | (!is.nan(po) & po == 0) | (!is.nan(pc) & pc == 0)) - return(NaN) + return(NA) # compute de-meaned log-returns d1 <- r1 - mean(r1, na.rm = TRUE)/pt*tau @@ -255,3 +254,126 @@ edge <- function(open, high, low, close, sign = FALSE){ return(s) } + +#' Rolling Estimates of Bid-Ask Spreads from Open, High, Low, and Close Prices +#' +#' Implements a rolling window calculation of the efficient estimator of bid-ask spreads +#' from open, high, low, and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024): +#' \doi{10.1016/j.jfineco.2024.103916}. +#' +#' @details +#' Prices must be sorted in ascending order of the timestamp. +#' +#' @param open numeric vector of open prices. +#' @param high numeric vector of high prices. +#' @param low numeric vector of low prices. +#' @param close numeric vector of close prices. +#' @param width if an integer, the width of the rolling window. If a vector with the same length of the input prices, the width of the window corresponding to each observation. Otherwise, a vector of endpoints. See examples. +#' @param sign whether to return signed estimates. +#' @param na.rm whether to ignore missing values. +#' +#' @return Vector of spread estimates. +#' A value of 0.01 corresponds to a spread of 1\%. +#' This function always returns a result of the same length as the input prices. +#' +#' @references +#' Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. Journal of Financial Economics, 161, 103916. +#' \doi{10.1016/j.jfineco.2024.103916} +#' +#' @examples +#' # simulate open, high, low, and close prices with spread 1% +#' x <- sim(spread = 0.01) +#' +#' # estimate the spread using a rolling window +#' s <- edge_rolling(x$Open, x$High, x$Low, x$Close, width = 21) +#' tail(s) +#' +#' # estimate the spread using custom endpoints +#' ep <- c(3, 35, 100) +#' s <- edge_rolling(x$Open, x$High, x$Low, x$Close, width = ep) +#' s[c(35, 100)] +#' # equivalent to +#' edge(x$Open[3:35], x$High[3:35], x$Low[3:35], x$Close[3:35]) +#' edge(x$Open[35:100], x$High[35:100], x$Low[35:100], x$Close[35:100]) +#' +#' # estimate the spread using an expanding window +#' s <- edge_rolling(x$Open, x$High, x$Low, x$Close, width = 1:nrow(x)) +#' tail(s) +#' # equivalent to +#' s <- edge_expanding(x$Open, x$High, x$Low, x$Close, na.rm = FALSE) +#' tail(s) +#' +#' @export +#' +edge_rolling <- function(open, high, low, close, width, sign = FALSE, na.rm = FALSE){ + n <- length(open) + if(length(high) != n | length(low) != n | length(close) != n) + stop("open, high, low, close must have the same length") + + EDGE( + open = as.numeric(open), + high = as.numeric(high), + low = as.numeric(low), + close = as.numeric(close), + width = width, + sign = sign, + na.rm = na.rm, + aslist = FALSE + ) + +} + +#' Expanding Estimates of Bid-Ask Spreads from Open, High, Low, and Close Prices +#' +#' Implements an expanding window calculation of the efficient estimator of bid-ask spreads +#' from open, high, low, and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024): +#' \doi{10.1016/j.jfineco.2024.103916}. +#' +#' @details +#' Prices must be sorted in ascending order of the timestamp. +#' +#' @param open numeric vector of open prices. +#' @param high numeric vector of high prices. +#' @param low numeric vector of low prices. +#' @param close numeric vector of close prices. +#' @param sign whether to return signed estimates. +#' @param na.rm whether to ignore missing values. +#' +#' @return Vector of spread estimates. +#' A value of 0.01 corresponds to a spread of 1\%. +#' This function always returns a result of the same length as the input prices. +#' +#' @references +#' Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. Journal of Financial Economics, 161, 103916. +#' \doi{10.1016/j.jfineco.2024.103916} +#' +#' @examples +#' # simulate open, high, low, and close prices with spread 1% +#' x <- sim(spread = 0.01) +#' +#' # estimate the spread using an expanding window +#' s <- edge_expanding(x$Open, x$High, x$Low, x$Close) +#' tail(s) +#' # equivalent to +#' s <- edge_rolling(x$Open, x$High, x$Low, x$Close, width = 1:nrow(x), na.rm = TRUE) +#' tail(s) +#' +#' @export +#' +edge_expanding <- function(open, high, low, close, sign = FALSE, na.rm = TRUE){ + n <- length(open) + if(length(high) != n | length(low) != n | length(close) != n) + stop("open, high, low, close must have the same length") + + EDGE( + open = as.numeric(open), + high = as.numeric(high), + low = as.numeric(low), + close = as.numeric(close), + width = 1:n, + sign = sign, + na.rm = na.rm, + aslist = FALSE + ) + +} diff --git a/r/R/ohlc.R b/r/R/ohlc.R index 030f16e..9ffc0da 100644 --- a/r/R/ohlc.R +++ b/r/R/ohlc.R @@ -2,68 +2,73 @@ #' #' @keywords internal #' -OHLC <- function(x, width = nrow(x), method, sign, na.rm){ - - methods <- strsplit(method, split = ".", fixed = TRUE) - - m <- unique(unlist(methods)) - p <- unique(unlist(strsplit(m, split = ""))) +OHLC <- function(open, high, low, close, width, method, sign, na.rm){ + splitmethods <- strsplit(method, split = ".", fixed = TRUE) + uniquemethods <- unique(unlist(splitmethods)) ok <- c("OHL","OHLC","CHL","CHLO") - if(length(ko <- setdiff(m, ok))) + if(length(ko <- setdiff(uniquemethods, ok))) stop(sprintf( "Method(s) '%s' not available. The available methods include '%s', or any combination of them, e.g. 'OHLC.CHLO'.", paste(ko, collapse = "', '"), paste(ok, collapse = "', '") )) - x <- log(x) - - O <- x$OPEN - C <- x$CLOSE - H <- x$HIGH - L <- x$LOW - M <- (H+L)/2 + o <- log(open) + h <- log(high) + l <- log(low) + c <- log(close) + m <- (h + l) / 2 - C1 <- lag(C) - H1 <- lag(H) - L1 <- lag(L) - M1 <- lag(M) + c1 <- shift(c, 1) + h1 <- shift(h, 1) + l1 <- shift(l, 1) + m1 <- shift(m, 1) - tau <- (H!=L | L!=C1)[-1] - pt <- rmean(tau, width = width-1, na.rm = na.rm) + if(length(c1) == 0) c1 <- rep(NA, length(h)) + tau <- ifelse(is.na(h) | is.na(l), NA, h != l | l != c1) + tau[1] <- NA - if("OHL" %in% m | "OHLC" %in% m) - po <- rmean((O!=H & tau) + (O!=L & tau), width = width-1, na.rm = na.rm) - if("CHL" %in% m | "CHLO" %in% m) - pc <- rmean((C1!=H1 & tau) + (C1!=L1 & tau), width = width-1, na.rm = na.rm) + shift <- 1 + pt <- rmean(tau, width = width, shift = shift, na.rm = na.rm) + nt <- rsum(tau, width = width, shift = shift, na.rm = TRUE) - s2 <- function(r1, r2, pi){ - x <- cbind(r1*r2, r1, tau*r2)[-1] - m <- rmean(x, width = width-1, na.rm = na.rm) - -8 / pi * (m[,1] - (m[,2] * m[,3]) / pt) + if("OHL" %in% uniquemethods | "OHLC" %in% uniquemethods){ + po1 <- rmean(tau * (o != h), width = width, shift = shift, na.rm = na.rm) + po2 <- rmean(tau * (o != l), width = width, shift = shift, na.rm = na.rm) + po <- po1 + po2 } - if("OHL" %in% m) - S2.OHL <- s2(M-O, O-M1, po) - if("OHLC" %in% m) - S2.OHLC <- s2(M-O, O-C1, po) - if("CHL" %in% m) - S2.CHL <- s2(M-C1, C1-M1, pc) - if("CHLO" %in% m) - S2.CHLO <- s2(O-C1, C1-M1, pc) - - S2 <- NULL - for(m in methods){ - expr <- sprintf("(%s)/%s", paste0("S2.", m, collapse = "+"), length(m)) - S2 <- cbind(S2, eval(parse(text = expr))) + if("CHL" %in% uniquemethods | "CHLO" %in% uniquemethods){ + pc1 <- rmean(tau * (c1 != h1), width = width, shift = shift, na.rm = na.rm) + pc2 <- rmean(tau * (c1 != l1), width = width, shift = shift, na.rm = na.rm) + pc <- pc1 + pc2 } - S2[is.infinite(S2)] <- NaN - colnames(S2) <- method + s2 <- function(r1, r2, pi){ + x <- data.frame(r1*r2, r1, tau*r2); x[1,] <- NA + m <- rmean(x, width = width, shift = shift, na.rm = na.rm) + m[which(nt < 2 | pi == 0),] <- NA + -8 / pi * (m[,1] - (m[,2] * m[,3]) / pt) + } - S <- sign(S2) * sqrt(abs(S2)) - if(!sign) S <- abs(S) + if("OHL" %in% uniquemethods) + s2.OHL <- s2(m - o, o - m1, po) + if("OHLC" %in% uniquemethods) + s2.OHLC <- s2(m - o, o - c1, po) + if("CHL" %in% uniquemethods) + s2.CHL <- s2(m - c1, c1 - m1, pc) + if("CHLO" %in% uniquemethods) + s2.CHLO <- s2(o - c1, c1 - m1, pc) + + s <- lapply(splitmethods, function(m){ + expr <- sprintf("(%s)/%s", paste0("s2.", m, collapse = "+"), length(m)) + s2 <- eval(parse(text = expr)) + s <- sqrt(abs(s2)) + if(sign) s <- s * base::sign(s2) + return(s) + }) - return(S) + names(s) <- method + return(s) } diff --git a/r/R/roll.R b/r/R/roll.R index b4d2083..9c7ce6c 100644 --- a/r/R/roll.R +++ b/r/R/roll.R @@ -2,28 +2,24 @@ #' #' @keywords internal #' -ROLL <- function(x, width = nrow(x), sign, na.rm){ +ROLL <- function(close, width, sign, na.rm){ - x <- log(x) + c <- log(close) + c1 <- shift(c, 1) + c2 <- shift(c, 2) - C <- x$CLOSE - C1 <- lag(C, 1) - C2 <- lag(C, 2) + r1 <- c - c1 + r2 <- c1 - c2 - R1 <- C - C1 - R2 <- C1 - C2 + shift <- 2 + x <- data.frame(r1, r2, r1*r2) + m <- rmean(x, width = width, shift = shift, na.rm = na.rm) + n <- rsum(!is.na(r2), width = width, shift = shift, na.rm = na.rm) - N <- xts::xts(!is.na(R2), order.by = zoo::index(R2))[-(1:2)] + s2 <- -4 * n/(n - 1) * (m[,3] - m[,1]*m[,2]) + s <- base::sign(s2) * sqrt(abs(s2)) + if(!sign) s <- abs(s) - m <- rmean(cbind(R1, R2, R1*R2)[-(1:2),], width = width-2, na.rm = na.rm) - n <- rsum(N, width = width-2, na.rm = na.rm) - - S2 <- -4 * n/(n-1) * (m[,3] - m[,1]*m[,2]) - colnames(S2) <- "ROLL" - - S <- sign(S2) * sqrt(abs(S2)) - if(!sign) S <- abs(S) - - return(S) + return(list("ROLL" = s)) } diff --git a/r/R/sim.R b/r/R/sim.R index 0128d72..5fd4e49 100644 --- a/r/R/sim.R +++ b/r/R/sim.R @@ -1,6 +1,6 @@ #' Simulation of Open, High, Low, and Close Prices #' -#' This function performs simulations consisting of \code{n} periods (e.g., days) and where each period consists of a given number of \code{trades}. +#' This function performs simulations consisting of \code{n} periods and where each period consists of a given number of \code{trades}. #' For each trade, the actual price \eqn{P_t} is simulated as \eqn{P_t = P_{t-1}e^{\sigma x}}, where \eqn{\sigma} is the standard deviation per trade and \eqn{x} is a random draw from a unit normal distribution. #' The standard deviation per trade equals the \code{volatility} divided by the square root of the number of \code{trades}. #' Trades are assumed to be observed with a given \code{probability}. @@ -16,19 +16,20 @@ #' @param volatility the open-to-close volatility. #' @param overnight the close-to-open volatility. #' @param drift the expected return per period. -#' @param units the units of the time period. One of: \code{sec}, \code{min}, \code{hour}, \code{day}, \code{week}, \code{month}, \code{year}. +#' @param units the units of the time period. One of: \code{1}, \code{sec}, \code{min}, \code{hour}, \code{day}, \code{week}, \code{month}, \code{year}. #' @param sign whether to return positive prices for buys and negative prices for sells. #' -#' @return Simulated open, high, low, and close prices. -#' -#' @note -#' Please cite Ardia, Guidotti, & Kroencke (2024) -#' when using this package in publication. +#' @return A data.frame of open, high, low, and close prices if \code{units=1} (default). +#' Otherwise, an \code{xts} object is returned (requires the \code{xts} package to be installed). #' #' @references #' Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. Journal of Financial Economics, 161, 103916. #' \doi{10.1016/j.jfineco.2024.103916} #' +#' @examples +#' # simulate 10 open, high, low, and close prices with spread 1% +#' sim(n = 10, spread = 0.01) +#' #' @export #' sim <- function( @@ -39,14 +40,14 @@ sim <- function( volatility = 0.03, overnight = 0, drift = 0, - units = "day", + units = 1, sign = FALSE){ # sanitize units if(units == "minute") units <- "min" # check units - valid <- c("sec","min","hour","day","week","month","year") + valid <- c(1, "sec", "min", "hour", "day", "week", "month", "year") if(!(units %in% valid)) stop(sprintf("units must be one of '%s'", paste(valid, collapse = "','"))) @@ -65,9 +66,8 @@ sim <- function( p <- exp(cumsum(r)) * (1 + z) # signed prices - if(sign){ - p <- p * sign(z) - } + if(sign) + p <- p * base::sign(z) # subset observations keep <- as.logical(rbinom(m, size = 1, prob = prob)) @@ -92,16 +92,18 @@ sim <- function( prev <- obs[last] } - # get time - now <- Sys.time() - if(!(units %in% c("sec","min","hour"))) - now <- as.Date(now) + if(units == 1){ + ohlc <- as.data.frame(ohlc) + } + else { + now <- Sys.time() + if(!(units %in% c("sec", "min", "hour"))) + now <- as.Date(now) + time <- seq(now, length = n, by = units) + ohlc <- xts::xts(ohlc, order.by = time) + } - # return OHLC - time <- seq(now, length = n, by = units) - p <- xts::xts(ohlc, order.by = time) - cn <- c("Open", "High", "Low", "Close") - colnames(p) <- cn - return(p) + colnames(ohlc) <- c("Open", "High", "Low", "Close") + return(ohlc) } diff --git a/r/R/spread.R b/r/R/spread.R index e203ab0..9464454 100644 --- a/r/R/spread.R +++ b/r/R/spread.R @@ -1,27 +1,29 @@ #' Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices #' #' This function implements several methods to estimate bid-ask spreads -#' from open, high, low, and close prices. +#' from open, high, low, and close prices and it is optimized for fast +#' calculations over rolling and expanding windows. #' #' @details -#' The method \code{EDGE} implements the Efficient Discrete Generalized Estimator described in Ardia, Guidotti, & Kroencke (2024). +#' The method \code{EDGE} implements the Efficient Discrete Generalized Estimator described in Ardia, Guidotti, & Kroencke (JFE, 2024). #' -#' The methods \code{OHL}, \code{OHLC}, \code{CHL}, \code{CHLO} implement the generalized estimators described in Ardia, Guidotti, & Kroencke (2024). +#' The methods \code{OHL}, \code{OHLC}, \code{CHL}, \code{CHLO} implement the generalized estimators described in Ardia, Guidotti, & Kroencke (JFE, 2024). #' They can be combined by concatenating their identifiers, e.g., \code{OHLC.CHLO} uses an average of the \code{OHLC} and \code{CHLO} estimators. #' -#' The method \code{AR} implements the estimator described in Abdi & Ranaldo (2017). \code{AR2} implements their 2-period version. +#' The method \code{AR} implements the estimator described in Abdi & Ranaldo (RFS, 2017). \code{AR2} implements their 2-period version. #' -#' The method \code{CS} implements the estimator described in Corwin & Schultz (2012). \code{CS2} implements their 2-period version. Both versions are adjusted for overnight (close-to-open) returns as described in the paper. +#' The method \code{CS} implements the estimator described in Corwin & Schultz (JF, 2012). \code{CS2} implements their 2-period version. Both versions are adjusted for overnight (close-to-open) returns as described in the paper. #' -#' The method \code{ROLL} implements the estimator described in Roll (1984). +#' The method \code{ROLL} implements the estimator described in Roll (JF, 1984). #' -#' @param x \code{\link[xts]{xts}} object with columns named \code{Open}, \code{High}, \code{Low}, \code{Close}. -#' @param width integer width of the rolling window to use, or vector of endpoints defining the intervals to use. By default, the whole time series is used to compute a single spread estimate. -#' @param method the estimator(s) to use. See details. +#' @param x tabular data with columns named \code{open}, \code{high}, \code{low}, \code{close} (case-insensitive). +#' @param width if an integer, the width of the rolling window. If a vector with the same length of the input prices, the width of the window corresponding to each observation. Otherwise, a vector of endpoints. By default, the full sample is used to compute a single spread estimate. See examples. +#' @param method the estimators to use. See details. #' @param sign whether to return signed estimates. #' @param na.rm whether to ignore missing values. #' -#' @return Time series of spread estimates. A value of 0.01 corresponds to a spread of 1\%. +#' @return A data.frame of spread estimates, or an \code{xts} object if \code{x} is of class \code{xts}. +#' A value of 0.01 corresponds to a spread of 1\%. #' #' @references #' Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. Journal of Financial Economics, 161, 103916. @@ -42,16 +44,29 @@ #' #' # estimate the spread #' spread(x) -#' -#' # by default this is equivalent to +#' # equivalent to #' edge(x$Open, x$High, x$Low, x$Close) #' #' # estimate the spread using a rolling window of 21 periods -#' spread(x, width = 21) -#' -#' # estimate the spread for each month -#' ep <- xts::endpoints(x, on = "months") +#' s <- spread(x, width = 21) +#' tail(s) +#' # equivalent to +#' s <- edge_rolling(x$Open, x$High, x$Low, x$Close, width = 21) +#' tail(s) +#' +#' # estimate the spread using an expanding window +#' s <- spread(x, width = 1:nrow(x)) +#' tail(s) +#' # equivalent to +#' s <- edge_expanding(x$Open, x$High, x$Low, x$Close, na.rm = FALSE) +#' tail(s) +#' +#' # estimate the spread using custom endpoints +#' ep <- c(3, 35, 100) #' spread(x, width = ep) +#' # equivalent to +#' edge(x$Open[3:35], x$High[3:35], x$Low[3:35], x$Close[3:35]) +#' edge(x$Open[35:100], x$High[35:100], x$Low[35:100], x$Close[35:100]) #' #' # use multiple estimators #' spread(x, method = c("EDGE", "AR", "CS", "ROLL", "OHLC", "OHL.CHL")) @@ -60,50 +75,58 @@ #' spread <- function(x, width = nrow(x), method = "EDGE", sign = FALSE, na.rm = FALSE){ - if(!is.xts(x)) - stop("x must be a xts object") - - if(nrow(x) < 3) - stop("x must contain at least 3 observations") + s <- list() + todo <- method <- toupper(method) + colnames(x) <- tolower(gsub("^(.*\\b)(Open|High|Low|Close)$", "\\2", colnames(x))) - method <- toupper(method) - colnames(x) <- toupper(gsub("^(.*\\b)(Open|High|Low|Close)$", "\\2", colnames(x))) + open <- as.numeric(x$open) + high <- as.numeric(x$high) + low <- as.numeric(x$low) + close <- as.numeric(x$close) - S <- NULL - x <- x[,intersect(colnames(x), c("OPEN", "HIGH", "LOW", "CLOSE"))] - - todo <- method - m <- "EDGE" if(m %in% todo){ - S <- cbind(S, EDGE(x, width = width, sign = sign, na.rm = na.rm)) + s <- c(s, EDGE(open, high, low, close, width, sign, na.rm)) todo <- setdiff(todo, m) } - m <- c("AR","AR2") + m <- c("AR", "AR2") if(any(m %in% todo)){ m <- intersect(todo, m) - S <- cbind(S, AR(x, width = width, method = m, sign = sign, na.rm = na.rm)) + s <- c(s, AR(high, low, close, width, m, sign, na.rm)) todo <- setdiff(todo, m) } - m <- c("CS","CS2") + m <- c("CS", "CS2") if(any(m %in% todo)){ m <- intersect(todo, m) - S <- cbind(S, CS(x, width = width, method = m, sign = sign, na.rm = na.rm)) + s <- c(s, CS(high, low, close, width, m, sign, na.rm)) todo <- setdiff(todo, m) } m <- "ROLL" if(m %in% todo){ - S <- cbind(S, ROLL(x, width = width, sign = sign, na.rm = na.rm)) + s <- c(s, ROLL(close, width, sign, na.rm)) todo <- setdiff(todo, m) } if(length(todo)){ - S <- cbind(S, OHLC(x, width = width, method = todo, sign = sign, na.rm = na.rm)) + s <- c(s, OHLC(open, high, low, close, width, todo, sign, na.rm)) } - return(S[,method]) + s <- as.data.frame(s, row.names = rownames(x)) + if(requireNamespace("xts", quietly = TRUE) & + requireNamespace("zoo", quietly = TRUE) + ){ + if(xts::is.xts(x)){ + s <- xts::xts(s, order.by = zoo::index(x)) + } + } + + nw <- length(width) + if(nw == 1) s <- s[-(1:pmax(1, width - 1)), , drop = FALSE] + else if(nw != nrow(x)) s <- s[width[-1], , drop = FALSE] + + return(s[, method, drop = FALSE]) } diff --git a/r/R/utils.R b/r/R/utils.R index 26a129c..a66a8fe 100644 --- a/r/R/utils.R +++ b/r/R/utils.R @@ -1,28 +1,46 @@ #' @keywords internal "_PACKAGE" -#' @import xts -#' @importFrom stats lag rbinom rnorm +#' @import data.table +#' @importFrom stats rbinom rnorm NULL -#' Rolling sum +#' Rolling function #' #' @keywords internal #' -rsum <- function(x, width, na.rm){ +rfun <- function(froll, x, width, shift, na.rm){ - if(length(width) == 1 && width < 1){ - width <- 1 - x[] <- NaN + nw <- length(width) + nc <- ncol(x); nr <- nrow(x) + if(is.null(nr)) nr <- length(x) + + n <- width - shift + if(nw != 1 && nw != nr){ + n <- rep(0, nr) + n[width[-1]] <- diff(pmax(1, width)) + } + + if(nw == 1 && n < 1){ + if(is.null(nc)) y <- rep(NA, nr) + else y <- as.data.frame(matrix(data = NA, nrow = nr, ncol = nc)) + } + else{ + y <- froll(x, n = n, na.rm = na.rm, adaptive = nw > 1, fill = NA) + if(is.list(y)) setDF(y) } - if(length(width) == 1 && width == nrow(x)) - width <- c(0, width) + return(y) - if(length(width) > 1) - return(xts::period.apply(x, INDEX = width[width>=0], FUN = colSums, na.rm = na.rm)) +} - return(zoo::rollsumr(x, k = width, na.rm = na.rm)) +#' #' Rolling sum +#' +#' @keywords internal +#' +rsum <- function(x, width, shift, na.rm){ + + rfun(frollsum, x, width, shift, na.rm) } @@ -30,19 +48,8 @@ rsum <- function(x, width, na.rm){ #' #' @keywords internal #' -rmean <- function(x, width, na.rm){ +rmean <- function(x, width, shift, na.rm){ - if(length(width) == 1 && width < 1){ - width <- 1 - x[] <- NaN - } + rfun(frollmean, x, width, shift, na.rm) - if(length(width) == 1 && width == nrow(x)) - width <- c(0, width) - - if(length(width) > 1) - return(xts::period.apply(x, INDEX = width[width>=0], FUN = colMeans, na.rm = na.rm)) - - return(zoo::rollmeanr(x, k = width, na.rm = na.rm)) - } diff --git a/r/README.md b/r/README.md index e41c41d..11ff675 100644 --- a/r/README.md +++ b/r/README.md @@ -10,7 +10,7 @@ install.packages("bidask") ## Usage -There are three functions in this package. The function `edge` computes a single bid-ask spread estimate from vectors of open, high, low, and close prices. The function `spread` is optimized for fast calculations over rolling windows, and implements additional estimators. The function `sim` simulates a time series of open, high, low, and close prices. The full [documentation](https://CRAN.R-project.org/package=bidask/bidask.pdf) is available on [CRAN](https://cran.r-project.org/package=bidask). A [vignette](https://cran.r-project.org/package=bidask/vignettes/bidask.html) is also available. +This package implements the following functions. The function `edge` computes a single bid-ask spread estimate from vectors of open, high, low, and close prices. The functions `edge_rolling` and `edge_expanding` are optimized for fast calculations over rolling and expanding windows, respectively. The function `spread` provides additional functionalities for `xts` objects and implements additional estimators. The function `sim` simulates a time series of open, high, low, and close prices. The main functions are presented below. The full [documentation](https://CRAN.R-project.org/package=bidask/bidask.pdf) is available on [CRAN](https://cran.r-project.org/package=bidask) and a [vignette](https://cran.r-project.org/package=bidask/vignettes/bidask.html) is also available. ```R library("bidask") @@ -32,17 +32,72 @@ edge(open, high, low, close, sign=FALSE) | `close` | Numeric vector of close prices. | | `sign` | Whether to return signed estimates. | -### Functions `spread` and `sim` +### Function: `edge_rolling` -For more information about these functions, see the [documentation](https://CRAN.R-project.org/package=bidask/bidask.pdf). +Implements a rolling window calculation of `edge`. The output is a vector of rolling spread estimates. A value of 0.01 corresponds to a spread of 1%. This function always returns a result of the same length as the input prices. -## Example +```R +edge_rolling(open, high, low, close, width, sign=FALSE, na.rm=FALSE) +``` + +| field | description | +| ------- | ------------------------------------------------------------ | +| `open` | Numeric vector of open prices. | +| `high` | Numeric vector of high prices. | +| `low` | Numeric vector of low prices. | +| `close` | Numeric vector of close prices. | +| `width` | If an integer, the width of the rolling window. If a vector with the same length of the input prices, the width of the window corresponding to each observation. Otherwise, a vector of endpoints. See examples. | +| `sign` | Whether to return signed estimates. | +| `na.rm` | Whether to ignore missing values. | + +### Function: `edge_expanding` + +Implements an expanding window calculation of `edge`. The output is a vector of expanding spread estimates. A value of 0.01 corresponds to a spread of 1%. This function always returns a result of the same length as the input prices. + +```R +edge_expanding(open, high, low, close, sign=FALSE, na.rm=TRUE) +``` + +| field | description | +| ------- | ----------------------------------- | +| `open` | Numeric vector of open prices. | +| `high` | Numeric vector of high prices. | +| `low` | Numeric vector of low prices. | +| `close` | Numeric vector of close prices. | +| `sign` | Whether to return signed estimates. | +| `na.rm` | Whether to ignore missing values. | + +## Examples + +Load the test data. ```R library("bidask") +x = read.csv("https://raw.githubusercontent.com/eguidotti/bidask/main/pseudocode/ohlc.csv") +``` + +Compute the spread estimate using all the observations. + +```R +edge(x$Open, x$High, x$Low, x$Close) +``` + +Compute rolling estimates using a window of 21 observations. + +```R +edge_rolling(x$Open, x$High, x$Low, x$Close, width = 21) +``` + +Estimate the spread using custom endpoints. + +```R +edge_rolling(x$Open, x$High, x$Low, x$Close, width = c(3, 35, 100)) +``` -df = read.csv("https://raw.githubusercontent.com/eguidotti/bidask/main/pseudocode/ohlc.csv") -edge(df$Open, df$High, df$Low, df$Close) +Estimate the spread using an expanding window + +```R +edge_expanding(x$Open, x$High, x$Low, x$Close, na.rm = FALSE) ``` ## Cite as diff --git a/r/man/AR.Rd b/r/man/AR.Rd index 9a12883..6cc3b3f 100644 --- a/r/man/AR.Rd +++ b/r/man/AR.Rd @@ -4,7 +4,7 @@ \alias{AR} \title{Abdi-Ranaldo Estimator} \usage{ -AR(x, width = nrow(x), method, sign, na.rm) +AR(high, low, close, width, method, sign, na.rm) } \description{ Abdi-Ranaldo Estimator diff --git a/r/man/CS.Rd b/r/man/CS.Rd index 77735dd..9ddd3d1 100644 --- a/r/man/CS.Rd +++ b/r/man/CS.Rd @@ -4,7 +4,7 @@ \alias{CS} \title{Corwin-Schultz Estimator} \usage{ -CS(x, width = nrow(x), method, sign, na.rm) +CS(high, low, close, width, method, sign, na.rm) } \description{ Corwin-Schultz Estimator diff --git a/r/man/OHLC.Rd b/r/man/OHLC.Rd index d904a0f..51ee614 100644 --- a/r/man/OHLC.Rd +++ b/r/man/OHLC.Rd @@ -4,7 +4,7 @@ \alias{OHLC} \title{OHLC Estimators} \usage{ -OHLC(x, width = nrow(x), method, sign, na.rm) +OHLC(open, high, low, close, width, method, sign, na.rm) } \description{ OHLC Estimators diff --git a/r/man/ROLL.Rd b/r/man/ROLL.Rd index 87a4050..ad76ab1 100644 --- a/r/man/ROLL.Rd +++ b/r/man/ROLL.Rd @@ -4,7 +4,7 @@ \alias{ROLL} \title{Roll Estimator} \usage{ -ROLL(x, width = nrow(x), sign, na.rm) +ROLL(close, width, sign, na.rm) } \description{ Roll Estimator diff --git a/r/man/bidask-package.Rd b/r/man/bidask-package.Rd index 6933818..be8c644 100644 --- a/r/man/bidask-package.Rd +++ b/r/man/bidask-package.Rd @@ -6,7 +6,7 @@ \alias{bidask-package} \title{bidask: Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices} \description{ -Implements an efficient estimator of bid-ask spreads from open, high, low, and close prices as described in Ardia, Guidotti, & Kroencke (2024) \doi{10.1016/j.jfineco.2024.103916}. It also provides an implementation of the estimators described in Roll (1984) \doi{10.1111/j.1540-6261.1984.tb03897.x}, Corwin & Schultz (2012) \doi{10.1111/j.1540-6261.2012.01729.x}, and Abdi & Ranaldo (2017) \doi{10.1093/rfs/hhx084}. +Implements the efficient estimator of bid-ask spreads from open, high, low, and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024) \doi{10.1016/j.jfineco.2024.103916}. It also provides an implementation of the estimators described in Roll (JF, 1984) \doi{10.1111/j.1540-6261.1984.tb03897.x}, Corwin & Schultz (JF, 2012) \doi{10.1111/j.1540-6261.2012.01729.x}, and Abdi & Ranaldo (RFS, 2017) \doi{10.1093/rfs/hhx084}. } \seealso{ Useful links: diff --git a/r/man/edge_expanding.Rd b/r/man/edge_expanding.Rd new file mode 100644 index 0000000..e0f734e --- /dev/null +++ b/r/man/edge_expanding.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edge.R +\name{edge_expanding} +\alias{edge_expanding} +\title{Expanding Estimates of Bid-Ask Spreads from Open, High, Low, and Close Prices} +\usage{ +edge_expanding(open, high, low, close, sign = FALSE, na.rm = TRUE) +} +\arguments{ +\item{open}{numeric vector of open prices.} + +\item{high}{numeric vector of high prices.} + +\item{low}{numeric vector of low prices.} + +\item{close}{numeric vector of close prices.} + +\item{sign}{whether to return signed estimates.} + +\item{na.rm}{whether to ignore missing values.} +} +\value{ +Vector of spread estimates. +A value of 0.01 corresponds to a spread of 1\%. +This function always returns a result of the same length as the input prices. +} +\description{ +Implements an expanding window calculation of the efficient estimator of bid-ask spreads +from open, high, low, and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024): +\doi{10.1016/j.jfineco.2024.103916}. +} +\details{ +Prices must be sorted in ascending order of the timestamp. +} +\examples{ +# simulate open, high, low, and close prices with spread 1\% +x <- sim(spread = 0.01) + +# estimate the spread using an expanding window +s <- edge_expanding(x$Open, x$High, x$Low, x$Close) +tail(s) +# equivalent to +s <- edge_rolling(x$Open, x$High, x$Low, x$Close, width = 1:nrow(x), na.rm = TRUE) +tail(s) + +} +\references{ +Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. Journal of Financial Economics, 161, 103916. +\doi{10.1016/j.jfineco.2024.103916} +} diff --git a/r/man/edge_rolling.Rd b/r/man/edge_rolling.Rd new file mode 100644 index 0000000..5c3c7a5 --- /dev/null +++ b/r/man/edge_rolling.Rd @@ -0,0 +1,64 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/edge.R +\name{edge_rolling} +\alias{edge_rolling} +\title{Rolling Estimates of Bid-Ask Spreads from Open, High, Low, and Close Prices} +\usage{ +edge_rolling(open, high, low, close, width, sign = FALSE, na.rm = FALSE) +} +\arguments{ +\item{open}{numeric vector of open prices.} + +\item{high}{numeric vector of high prices.} + +\item{low}{numeric vector of low prices.} + +\item{close}{numeric vector of close prices.} + +\item{width}{if an integer, the width of the rolling window. If a vector with the same length of the input prices, the width of the window corresponding to each observation. Otherwise, a vector of endpoints. See examples.} + +\item{sign}{whether to return signed estimates.} + +\item{na.rm}{whether to ignore missing values.} +} +\value{ +Vector of spread estimates. +A value of 0.01 corresponds to a spread of 1\%. +This function always returns a result of the same length as the input prices. +} +\description{ +Implements a rolling window calculation of the efficient estimator of bid-ask spreads +from open, high, low, and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024): +\doi{10.1016/j.jfineco.2024.103916}. +} +\details{ +Prices must be sorted in ascending order of the timestamp. +} +\examples{ +# simulate open, high, low, and close prices with spread 1\% +x <- sim(spread = 0.01) + +# estimate the spread using a rolling window +s <- edge_rolling(x$Open, x$High, x$Low, x$Close, width = 21) +tail(s) + +# estimate the spread using custom endpoints +ep <- c(3, 35, 100) +s <- edge_rolling(x$Open, x$High, x$Low, x$Close, width = ep) +s[c(35, 100)] +# equivalent to +edge(x$Open[3:35], x$High[3:35], x$Low[3:35], x$Close[3:35]) +edge(x$Open[35:100], x$High[35:100], x$Low[35:100], x$Close[35:100]) + +# estimate the spread using an expanding window +s <- edge_rolling(x$Open, x$High, x$Low, x$Close, width = 1:nrow(x)) +tail(s) +# equivalent to +s <- edge_expanding(x$Open, x$High, x$Low, x$Close, na.rm = FALSE) +tail(s) + +} +\references{ +Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. Journal of Financial Economics, 161, 103916. +\doi{10.1016/j.jfineco.2024.103916} +} diff --git a/r/man/rfun.Rd b/r/man/rfun.Rd new file mode 100644 index 0000000..6186fa2 --- /dev/null +++ b/r/man/rfun.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{rfun} +\alias{rfun} +\title{Rolling function} +\usage{ +rfun(froll, x, width, shift, na.rm) +} +\description{ +Rolling function +} +\keyword{internal} diff --git a/r/man/rmean.Rd b/r/man/rmean.Rd index ca16edf..3308b57 100644 --- a/r/man/rmean.Rd +++ b/r/man/rmean.Rd @@ -4,7 +4,7 @@ \alias{rmean} \title{Rolling mean} \usage{ -rmean(x, width, na.rm) +rmean(x, width, shift, na.rm) } \description{ Rolling mean diff --git a/r/man/rsum.Rd b/r/man/rsum.Rd index 72067cd..8b736fa 100644 --- a/r/man/rsum.Rd +++ b/r/man/rsum.Rd @@ -2,11 +2,11 @@ % Please edit documentation in R/utils.R \name{rsum} \alias{rsum} -\title{Rolling sum} +\title{#' Rolling sum} \usage{ -rsum(x, width, na.rm) +rsum(x, width, shift, na.rm) } \description{ -Rolling sum +#' Rolling sum } \keyword{internal} diff --git a/r/man/sim.Rd b/r/man/sim.Rd index 60aeac9..99226b6 100644 --- a/r/man/sim.Rd +++ b/r/man/sim.Rd @@ -12,7 +12,7 @@ sim( volatility = 0.03, overnight = 0, drift = 0, - units = "day", + units = 1, sign = FALSE ) } @@ -31,15 +31,16 @@ sim( \item{drift}{the expected return per period.} -\item{units}{the units of the time period. One of: \code{sec}, \code{min}, \code{hour}, \code{day}, \code{week}, \code{month}, \code{year}.} +\item{units}{the units of the time period. One of: \code{1}, \code{sec}, \code{min}, \code{hour}, \code{day}, \code{week}, \code{month}, \code{year}.} \item{sign}{whether to return positive prices for buys and negative prices for sells.} } \value{ -Simulated open, high, low, and close prices. +A data.frame of open, high, low, and close prices if \code{units=1} (default). +Otherwise, an \code{xts} object is returned (requires the \code{xts} package to be installed). } \description{ -This function performs simulations consisting of \code{n} periods (e.g., days) and where each period consists of a given number of \code{trades}. +This function performs simulations consisting of \code{n} periods and where each period consists of a given number of \code{trades}. For each trade, the actual price \eqn{P_t} is simulated as \eqn{P_t = P_{t-1}e^{\sigma x}}, where \eqn{\sigma} is the standard deviation per trade and \eqn{x} is a random draw from a unit normal distribution. The standard deviation per trade equals the \code{volatility} divided by the square root of the number of \code{trades}. Trades are assumed to be observed with a given \code{probability}. @@ -48,9 +49,10 @@ High and low prices equal the highest and lowest prices observed during the peri Open and Close prices equal the first and the last price observed in the period. If no trade is observed for a period, then the previous Close is used as the Open, High, Low, and Close prices for that period. } -\note{ -Please cite Ardia, Guidotti, & Kroencke (2024) -when using this package in publication. +\examples{ +# simulate 10 open, high, low, and close prices with spread 1\% +sim(n = 10, spread = 0.01) + } \references{ Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. Journal of Financial Economics, 161, 103916. diff --git a/r/man/spread.Rd b/r/man/spread.Rd index e90929a..f2240f1 100644 --- a/r/man/spread.Rd +++ b/r/man/spread.Rd @@ -7,34 +7,36 @@ spread(x, width = nrow(x), method = "EDGE", sign = FALSE, na.rm = FALSE) } \arguments{ -\item{x}{\code{\link[xts]{xts}} object with columns named \code{Open}, \code{High}, \code{Low}, \code{Close}.} +\item{x}{tabular data with columns named \code{open}, \code{high}, \code{low}, \code{close} (case-insensitive).} -\item{width}{integer width of the rolling window to use, or vector of endpoints defining the intervals to use. By default, the whole time series is used to compute a single spread estimate.} +\item{width}{if an integer, the width of the rolling window. If a vector with the same length of the input prices, the width of the window corresponding to each observation. Otherwise, a vector of endpoints. By default, the full sample is used to compute a single spread estimate. See examples.} -\item{method}{the estimator(s) to use. See details.} +\item{method}{the estimators to use. See details.} \item{sign}{whether to return signed estimates.} \item{na.rm}{whether to ignore missing values.} } \value{ -Time series of spread estimates. A value of 0.01 corresponds to a spread of 1\%. +A data.frame of spread estimates, or an \code{xts} object if \code{x} is of class \code{xts}. +A value of 0.01 corresponds to a spread of 1\%. } \description{ This function implements several methods to estimate bid-ask spreads -from open, high, low, and close prices. +from open, high, low, and close prices and it is optimized for fast +calculations over rolling and expanding windows. } \details{ -The method \code{EDGE} implements the Efficient Discrete Generalized Estimator described in Ardia, Guidotti, & Kroencke (2024). +The method \code{EDGE} implements the Efficient Discrete Generalized Estimator described in Ardia, Guidotti, & Kroencke (JFE, 2024). -The methods \code{OHL}, \code{OHLC}, \code{CHL}, \code{CHLO} implement the generalized estimators described in Ardia, Guidotti, & Kroencke (2024). +The methods \code{OHL}, \code{OHLC}, \code{CHL}, \code{CHLO} implement the generalized estimators described in Ardia, Guidotti, & Kroencke (JFE, 2024). They can be combined by concatenating their identifiers, e.g., \code{OHLC.CHLO} uses an average of the \code{OHLC} and \code{CHLO} estimators. -The method \code{AR} implements the estimator described in Abdi & Ranaldo (2017). \code{AR2} implements their 2-period version. +The method \code{AR} implements the estimator described in Abdi & Ranaldo (RFS, 2017). \code{AR2} implements their 2-period version. -The method \code{CS} implements the estimator described in Corwin & Schultz (2012). \code{CS2} implements their 2-period version. Both versions are adjusted for overnight (close-to-open) returns as described in the paper. +The method \code{CS} implements the estimator described in Corwin & Schultz (JF, 2012). \code{CS2} implements their 2-period version. Both versions are adjusted for overnight (close-to-open) returns as described in the paper. -The method \code{ROLL} implements the estimator described in Roll (1984). +The method \code{ROLL} implements the estimator described in Roll (JF, 1984). } \examples{ # simulate open, high, low, and close prices with spread 1\% @@ -42,16 +44,29 @@ x <- sim(spread = 0.01) # estimate the spread spread(x) - -# by default this is equivalent to +# equivalent to edge(x$Open, x$High, x$Low, x$Close) # estimate the spread using a rolling window of 21 periods -spread(x, width = 21) - -# estimate the spread for each month -ep <- xts::endpoints(x, on = "months") +s <- spread(x, width = 21) +tail(s) +# equivalent to +s <- edge_rolling(x$Open, x$High, x$Low, x$Close, width = 21) +tail(s) + +# estimate the spread using an expanding window +s <- spread(x, width = 1:nrow(x)) +tail(s) +# equivalent to +s <- edge_expanding(x$Open, x$High, x$Low, x$Close, na.rm = FALSE) +tail(s) + +# estimate the spread using custom endpoints +ep <- c(3, 35, 100) spread(x, width = ep) +# equivalent to +edge(x$Open[3:35], x$High[3:35], x$Low[3:35], x$Close[3:35]) +edge(x$Open[35:100], x$High[35:100], x$Low[35:100], x$Close[35:100]) # use multiple estimators spread(x, method = c("EDGE", "AR", "CS", "ROLL", "OHLC", "OHL.CHL")) diff --git a/r/tests/testthat/test-edge.R b/r/tests/testthat/test-edge.R index 7d9c489..aab29ad 100644 --- a/r/tests/testthat/test-edge.R +++ b/r/tests/testthat/test-edge.R @@ -1,7 +1,36 @@ test_that("edge", { + x <- read.csv("https://raw.githubusercontent.com/eguidotti/bidask/main/pseudocode/ohlc.csv") + s <- edge(x$Open, x$High, x$Low, x$Close) + + expect_equal(s, 0.0101849034905478) + +}) + +test_that("edge-miss", { + + x <- read.csv("https://raw.githubusercontent.com/eguidotti/bidask/main/pseudocode/ohlc-miss.csv") + s <- edge(x$Open, x$High, x$Low, x$Close) + + expect_equal(s, 0.01013284969780197) + +}) + +test_that("edge-na", { + + expect_true(is.na(edge( + c(18.21, 17.61, 17.61), + c(18.21, 17.61, 17.61), + c(17.61, 17.61, 17.61), + c(17.61, 17.61, 17.61) + ))) + +}) + +test_that("edge-spread", { + set.seed(123) - x <- sim(prob = 0.01) + x <- sim(prob = 0.01, units = "day") s1 <- as.numeric(spread(x, method = "EDGE")) s2 <- edge(x$Open, x$High, x$Low, x$Close) @@ -10,10 +39,10 @@ test_that("edge", { }) -test_that("edge-monthly", { +test_that("edge-spread-monthly", { set.seed(123) - x <- sim(prob = 0.01) + x <- sim(prob = 0.01, units = "day") zoo::index(x) <- zoo::index(x) - as.integer(start(x)) width <- xts::endpoints(x, on = "months") @@ -28,10 +57,10 @@ test_that("edge-monthly", { }) -test_that("edge-rolling", { +test_that("edge-spread-rolling", { set.seed(123) - x <- sim(prob = 0.01) + x <- sim(prob = 0.01, units = "day") for(width in c(1, 2, 3, 4, 21, 100)){ @@ -46,10 +75,10 @@ test_that("edge-rolling", { }) -test_that("edge-sign", { +test_that("edge-spread-sign", { set.seed(123) - x <- sim(prob = 0.01) + x <- sim(prob = 0.01, units = "day") width <- 21 s1 <- spread(x, width = width, method = "EDGE", sign = TRUE) @@ -61,13 +90,77 @@ test_that("edge-sign", { }) -test_that("edge-nan", { +test_that("edge-rolling", { - expect_true(is.nan(edge( - c(18.21, 17.61, 17.61), - c(18.21, 17.61, 17.61), - c(17.61, 17.61, 17.61), - c(17.61, 17.61, 17.61) - ))) + set.seed(123) + for(units in c(1, "day")) for(sign in c(TRUE, FALSE)) for(width in c(2, 3, 21)){ + + x <- sim(prob = 0.01, units = units) + + s1 <- spread(x, width = width, method = "EDGE", sign = sign) + s2 <- edge_rolling(x$Open, x$High, x$Low, x$Close, width = width, sign = sign) + + if(is.data.frame(x)) + idx <- as.integer(rownames(s1)) + else + idx <- which(zoo::index(x) %in% zoo::index(s1)) + + expect_equal(length(s2), nrow(x)) + expect_equal(as.numeric(s1[,1]), s2[idx]) + + } + +}) + +test_that("edge-expanding", { + set.seed(123) + for(units in c(1, "day")) for(sign in c(TRUE, FALSE)) { + + x <- sim(prob = 0.01, units = units) + + s1 <- spread(x, width = 1:nrow(x), method = "EDGE", sign = sign) + s2 <- edge_expanding(x$Open, x$High, x$Low, x$Close, sign = sign) + + if(is.data.frame(x)) + idx <- as.integer(rownames(s1)) + else + idx <- which(zoo::index(x) %in% zoo::index(s1)) + + expect_equal(length(s2), nrow(x)) + expect_equal(as.numeric(s1[,1]), s2[idx]) + + } + +}) + +test_that("spread", { + + set.seed(123) + x <- sim(prob = 0.01) + + s <- spread(x[, c("Open", "High", "Low", "Close")], method = "EDGE") + expect_equal(as.numeric(s), 0.011211623772355) + + s <- spread(x[, c("Open", "High", "Low", "Close")], method = "OHLC") + expect_equal(as.numeric(s), 0.0111885179011119) + + s <- spread(x[, c("Open", "High", "Low", "Close")], method = "CHLO") + expect_equal(as.numeric(s), 0.0109352942009762) + + s <- spread(x[, c("Open", "High", "Low")], method = "OHL", na.rm = TRUE) + expect_equal(as.numeric(s), 0.0109503006263557) + + s <- spread(x[, c("High", "Low", "Close")], method = "CHL") + expect_equal(as.numeric(s), 0.0113136390567206) + + s <- spread(x[, c("High", "Low", "Close")], method = "AR") + expect_equal(as.numeric(s), 0.00874585212811397) + + s <- spread(x[, c("High", "Low", "Close")], method = "CS") + expect_equal(as.numeric(s), 0.00273953769016127) + + s <- spread(x[, "Close", drop = FALSE], method = "ROLL") + expect_equal(as.numeric(s), 0.0125430188215437) + }) diff --git a/r/vignettes/bidask.Rmd b/r/vignettes/bidask.Rmd index 3881a25..373a520 100644 --- a/r/vignettes/bidask.Rmd +++ b/r/vignettes/bidask.Rmd @@ -12,37 +12,26 @@ knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 6, + out.width="100%", + dpi = 300, warning = FALSE, message = FALSE ) ``` -This vignette illustrates how to estimate bid-ask spreads from open, high, low, and close prices. Let's start by loading the package: +This vignette illustrates how to estimate bid-ask spreads from open, high, low, and close prices using the efficient estimator described in Ardia, Guidotti, & Kroencke (JFE, 2024): [https://doi.org/10.1016/j.jfineco.2024.103916](https://doi.org/10.1016/j.jfineco.2024.103916). ```{r setup} library(bidask) ``` -The package offers two ways to estimate bid-ask spreads: +The function `edge` computes a single bid-ask spread estimate from vectors of open, high, low, and close prices. The functions `edge_rolling` and `edge_expanding` are optimized for fast calculations over rolling and expanding windows, respectively. The function `spread` provides additional functionalities for `xts` objects and implements additional estimators. For all functions, an output value of 0.01 corresponds to a spread estimate of 1%. -1. `edge()`: designed for tidy data. -2. `spread()`: designed for `xts` objects. +## Functions `edge`, `edge_rolling`, `edge_expanding` -The function `edge()` implements the efficient estimator described in [Ardia, Guidotti, & Kroencke (2024)](https://doi.org/10.1016/j.jfineco.2024.103916). Open, high, low, and close prices are to be passed as separate vectors. +These functions can be easily used with tidy data. For instance, download daily prices for Bitcoin and Ethereum using the [crypto2](https://cran.r-project.org/package=crypto2) package: -The function `spread()` requires an `xts` object containing columns named `Open`, `High`, `Low`, `Close` and it provides additional functionalities, such as additional estimators and rolling estimates. - -An output value of 0.01 corresponds to a spread estimate of 1%. - -Examples are provided below. - -## Tidy data - -The function `edge()` can be easily used with tidy data and the `dplyr` grammar. In the following example, we estimate bid-ask spreads for cryptocurrencies. - -Download daily prices for Bitcoin and Ethereum using the [crypto2](https://cran.r-project.org/package=crypto2) package: - -```{r} +```{r, results='hide'} library(dplyr) library(crypto2) df <- crypto_list(only_active=TRUE) %>% @@ -61,53 +50,73 @@ df %>% mutate(yyyy = format(timestamp, "%Y")) %>% group_by(symbol, yyyy) %>% arrange(timestamp) %>% - summarise(EDGE = edge(open, high, low, close)) + summarise("EDGE" = edge(open, high, low, close)) ``` -## `xts` objects - -The function `spread()` provides additional functionalities for [xts](https://cran.r-project.org/package=xts) objects. In the following example, we estimate bid-ask spreads for equities. +Estimate the spread using a rolling window of 30 days for each coin and plot the results: -Download daily data for Microsoft (MSFT) using the [quantmod](https://cran.r-project.org/package=quantmod) package: +```{r} +library(ggplot2) +df %>% + group_by(symbol) %>% + arrange(timestamp) %>% + mutate("EDGE (rolling)" = edge_rolling(open, high, low, close, width = 30)) %>% + ggplot(aes(x = timestamp, y = `EDGE (rolling)`, color = symbol)) + + geom_line() + + theme_minimal() +``` +Estimate the spread using an expanding window for each coin and plot the results: ```{r} -library(quantmod) -x <- getSymbols("MSFT", auto.assign = FALSE, start = "2019-01-01", end = "2022-12-31") -head(x) +df %>% + group_by(symbol) %>% + arrange(timestamp) %>% + mutate("EDGE (expanding)" = edge_expanding(open, high, low, close)) %>% + ggplot(aes(x = timestamp, y = `EDGE (expanding)`, color = symbol)) + + geom_line() + + theme_minimal() ``` -This is an `xts` object: +Notice that, generally, using intraday data (instead of daily) improves the estimation accuracy, especially when the spread is expected to be small (see example below). + +## Function `spread` + +The function `spread()` provides additional functionalities for [xts](https://cran.r-project.org/package=xts) objects and implements additional estimators. For instance, download daily data for Microsoft (MSFT) using the [quantmod](https://cran.r-project.org/package=quantmod) package which returns an `xts` object: ```{r} +library(quantmod) +x <- getSymbols("MSFT", auto.assign = FALSE, start = "2019-01-01", end = "2022-12-31") +head(x) class(x) ``` -So we can estimate the spread with: + +Estimate the spread with: ```{r} spread(x) ``` -By default, the call above is equivalent to: +or, equivalently: ```{r} edge(open = x[,1], high = x[,2], low = x[,3], close = x[,4]) ``` -But `spread()` also provides additional functionalities. For instance, estimate the spread for each month and plot the estimates: +Estimate the spread for each month and plot the estimates: ```{r} sp <- spread(x, width = endpoints(x, on = "months")) plot(sp) ``` -Or estimate the spread using a rolling window of 21 obervations: +Estimate the spread using a rolling window of 21 obervations: ```{r} sp <- spread(x, width = 21) plot(sp) ``` -To illustrate higher-frequency estimates, we are going to download intraday data from Alpha Vantage. You must register with Alpha Vantage in order to download their data, but the one-time registration is fast and free. Register at https://www.alphavantage.co/ to receive your key. You can set the API key globally as follows: +To illustrate higher-frequency estimates, download intraday data from Alpha Vantage. You must register with Alpha Vantage in order to download their data, but the one-time registration is fast and free. Register at https://www.alphavantage.co/ to receive your key. You can set the API key globally as follows: ```{r} setDefaults(getSymbols.av, api.key = "") @@ -130,7 +139,10 @@ x <- read.csv(system.file("extdata", "msft.csv", package = "bidask")) x <- xts(x[,-1], order.by = as.POSIXct(x[,1])) ``` +Keep only prices during regular market hours: + ```{r} +x <- x["T09:30/T16:00"] head(x) ``` @@ -138,14 +150,19 @@ Estimate the spread for each day and plot the estimates: ```{r} sp <- spread(x, width = endpoints(x, on = "day")) -plot(sp) +plot(sp, type = "b") ``` -## GitHub +Use multiple estimators and plot the estimates: -If you find this package useful, please [star the repo](https://github.com/eguidotti/bidask)! +```{r} +sp <- spread(x, width = endpoints(x, on = "day"), method = c("EDGE", "AR", "CS", "ROLL")) +plot(sp, type = "b", legend.loc = "topright") +``` + +## GitHub -The repository also contains implementations for Python, C++, MATLAB, and more. +If you find this package useful, please [star the repo](https://github.com/eguidotti/bidask)! The repository also contains implementations for Python, C++, MATLAB, and more; as well as open data containing bid-ask spread estimates for crypto pairs in Binance and for U.S. stocks in CRSP. ## Cite as