diff --git a/r/DESCRIPTION b/r/DESCRIPTION index 1ad2948..be9f975 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -1,7 +1,7 @@ Package: bidask Type: Package Title: Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices -Version: 2.0.6 +Version: 2.1.0 Authors@R: c( person(given = "Emanuele", family = "Guidotti", email = "emanuele.guidotti@usi.ch", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-8961-6623")), person(given = "David", family = "Ardia", role = c("ctb"), comment = c(ORCID = "0000-0003-2823-782X")), diff --git a/r/R/edge.R b/r/R/edge.R index bf9b797..6c6d49a 100644 --- a/r/R/edge.R +++ b/r/R/edge.R @@ -4,105 +4,151 @@ #' EDGE <- function(x, width = nrow(x), sign, na.rm){ + # compute log-prices x <- log(x) + o <- x$OPEN + h <- x$HIGH + l <- x$LOW + c <- x$CLOSE + m <- (h + l) / 2 + + # shift log-prices by one period + h1 <- lag(h, 1) + l1 <- lag(l, 1) + c1 <- lag(c, 1) + m1 <- lag(m, 1) - O <- x$OPEN - H <- x$HIGH - L <- x$LOW - C <- x$CLOSE - M <- (H+L)/2 - - H1 <- lag(H, 1) - L1 <- lag(L, 1) - C1 <- lag(C, 1) - M1 <- lag(M, 1) - - r1 <- M-O - r2 <- O-M1 - r3 <- M-C1 - r4 <- C1-M1 - r5 <- O-C1 - - tau <- H!=L | L!=C1 - phi1 <- O!=H & tau - phi2 <- O!=L & tau - phi3 <- C1!=H1 & tau - phi4 <- C1!=L1 & tau + # compute log-returns + r1 <- m - o + r2 <- o - m1 + r3 <- m - c1 + r4 <- c1 - m1 + r5 <- o - c1 + + # compute indicator variables + tau <- h != l | l != c1 + po1 <- tau & o != h + po2 <- tau & o != l + pc1 <- tau & c1 != h1 + pc2 <- tau & c1 != l1 + # compute base products for rolling means + r12 <- r1 * r2 + r15 <- r1 * r5 + r34 <- r3 * r4 + r45 <- r4 * r5 + tr1 <- tau * r1 + tr2 <- tau * r2 + tr4 <- tau * r4 + tr5 <- tau * r5 + + # set up data frame for rolling means x <- cbind( - r1*r2, # 1 - r3*r4, # 2 - r1*r5, # 3 - r5*r4, # 4 - tau, # 5 - r1, # 6 - tau*r2, # 7 - r3, # 8 - tau*r4, # 9 - r5, # 10 - r1^2*r2^2, # 11 - r3^2*r4^2, # 12 - r1^2*r5^2, # 13 - r4^2*r5^2, # 14 - r1*r2*r3*r4, # 15 - r1*r4*r5^2, # 16 - tau*r2^2, # 17 - tau*r4^2, # 18 - tau*r5^2, # 19 - tau*r1*r2^2, # 20 - tau*r3*r4^2, # 21 - tau*r1*r5^2, # 22 - tau*r5*r4^2, # 23 - tau*r1*r2*r4, # 24 - tau*r2*r3*r4, # 25 - tau*r2*r4, # 26 - tau*r1*r4*r5, # 27 - tau*r4*r5^2, # 28 - tau*r4*r5, # 29 - tau*r5, # 30 - phi1, # 31 - phi2, # 32 - phi3, # 33 - phi4 # 34 + r12, + r34, + r15, + r45, + tau, + r1, + tr2, + r3, + tr4, + r5, + r12^2, + r34^2, + r15^2, + r45^2, + r12 * r34, + r15 * r45, + tr2 * r2, + tr4 * r4, + tr5 * r5, + tr2 * r12, + tr4 * r34, + tr5 * r15, + tr4 * r45, + tr4 * r12, + tr2 * r34, + tr2 * r4, + tr1 * r45, + tr5 * r45, + tr4 * r5, + tr5, + po1, + po2, + pc1, + pc2 ) - m <- rmean(x[-1], width = width-1, na.rm = na.rm) + # mask the first observation and decrement width by 1 before + # computing rolling means to account for lagged prices + x <- x[-1] + width = width - 1 - po <- -8 / (m[,31] + m[,32]) - pc <- -8 / (m[,33] + m[,34]) + # compute rolling means + m <- rmean(x, width = width, na.rm = na.rm) - e1 <- po/2 * (m[,1] - m[,6]*m[,7]/m[,5]) + - pc/2 * (m[,2] - m[,8]*m[,9]/m[,5]) + # compute probabilities + pt <- m[,5] + po <- m[,31] + m[,32] + pc <- m[,33] + m[,34] - e2 <- po/2 * (m[,3] - m[,6]*m[,30]/m[,5]) + - pc/2 * (m[,4] - m[,10]*m[,9]/m[,5]) + # set to missing if there are less than two periods with tau=1 + # or po or pc is zero + nt <- rsum(x[,5], width = width, na.rm = TRUE) + m[nt < 2 | (!is.nan(po) & po == 0) | (!is.nan(pc) & pc == 0)] <- NaN - v1 <- po^2/4 * (m[,11] + m[,6]^2*m[,17]/m[,5]^2 - 2*m[,20]*m[,6]/m[,5]) + - pc^2/4 * (m[,12] + m[,8]^2*m[,18]/m[,5]^2 - 2*m[,21]*m[,8]/m[,5]) + - po*pc/2 * (m[,15] - m[,24]*m[,8]/m[,5] - m[,6]*m[,25]/m[,5] + m[,6]*m[,8]*m[,26]/m[,5]^2) - - e1^2 + # compute input vectors + a1 <- -4. / po + a2 <- -4. / pc + a3 <- m[,6] / pt + a4 <- m[,9] / pt + a5 <- m[,8] / pt + a6 <- m[,10] / pt + a12 <- 2 * a1 * a2 + a11 <- a1^2 + a22 <- a2^2 + a33 <- a3^2 + a55 <- a5^2 + a66 <- a6^2 - v2 <- po^2/4 * (m[,13] + m[,6]^2*m[,19]/m[,5]^2 - 2*m[,22]*m[,6]/m[,5]) + - pc^2/4 * (m[,14] + m[,10]^2*m[,18]/m[,5]^2 - 2*m[,23]*m[,10]/m[,5]) + - po*pc/2 * (m[,16] - m[,27]*m[,10]/m[,5] - m[,6]*m[,28]/m[,5] + m[,6]*m[,10]*m[,29]/m[,5]^2) - - e2^2 + # compute expectations + e1 <- a1 * (m[,1] - a3*m[,7]) + a2 * (m[,2] - a4*m[,8]) + e2 <- a1 * (m[,3] - a3*m[,30]) + a2 * (m[,4] - a4*m[,10]) - S2 <- (v2*e1 + v1*e2) / (v1 + v2) - S2[is.infinite(S2)] <- NaN - colnames(S2) <- "EDGE" - - S <- sign(S2) * sqrt(abs(S2)) - if(!sign) S <- abs(S) + # compute variances + v1 <- - e1^2 + ( + a11 * (m[,11] - 2*a3*m[,20] + a33*m[,17]) + + a22 * (m[,12] - 2*a5*m[,21] + a55*m[,18]) + + a12 * (m[,15] - a3*m[,25] - a5*m[,24] + a3*a5*m[,26]) + ) + v2 <- - e2^2 + ( + a11 * (m[,13] - 2*a3*m[,22] + a33*m[,19]) + + a22 * (m[,14] - 2*a6*m[,23] + a66*m[,18]) + + a12 * (m[,16] - a3*m[,28] - a6*m[,27] + a3*a6*m[,29]) + ) - return(S) + # compute square spread by using a (equally) weighted + # average if the total variance is (not) positive + vt <- v1 + v2 + s2 <- ifelse(!is.na(vt) & vt > 0, (v2*e1 + v1*e2) / vt, (e1 + e2) / 2.) + colnames(s2) <- "EDGE" + + # compute signed root + s <- sqrt(abs(s2)) + if(sign) + s <- s * base::sign(s2) + + # return the spread + return(s) } #' Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices #' -#' Implements an efficient estimator of bid-ask spreads -#' from open, high, low, and close prices as described in -#' Ardia, Guidotti, & Kroencke (2024). +#' Implements the efficient estimator of bid-ask spreads from open, high, low, +#' and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024): +#' \doi{10.1016/j.jfineco.2024.103916} #' #' @details #' Prices must be sorted in ascending order of the timestamp. @@ -111,14 +157,10 @@ EDGE <- function(x, width = nrow(x), sign, na.rm){ #' @param high numeric vector of high prices. #' @param low numeric vector of low prices. #' @param close numeric vector of close prices. -#' @param sign whether signed estimates should be returned. +#' @param sign whether to return signed estimates. #' #' @return The spread estimate. A value of 0.01 corresponds to a spread of 1\%. #' -#' @note -#' Please cite Ardia, Guidotti, & Kroencke (2024) -#' when using this package in publication. -#' #' @references #' Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. Journal of Financial Economics, 161, 103916. #' \doi{10.1016/j.jfineco.2024.103916} @@ -134,62 +176,82 @@ EDGE <- function(x, width = nrow(x), sign, na.rm){ #' edge <- function(open, high, low, close, sign = FALSE){ + # check that the open, high, low, and close prices have the same length n <- length(open) if(length(high) != n | length(low) != n | length(close) != n) stop("open, high, low, close must have the same length") + # return missing if there are less than 3 observations + if(n < 3) + return(NaN) + + # compute log-prices o <- log(as.numeric(open)) h <- log(as.numeric(high)) l <- log(as.numeric(low)) c <- log(as.numeric(close)) m <- (h + l) / 2 - h1 <- h[-n] - l1 <- l[-n] - c1 <- c[-n] - m1 <- m[-n] + # shift log-prices by one period + h1 <- h[-n]; l1 <- l[-n]; c1 <- c[-n]; m1 <- m[-n] + o <- o[-1]; h <- h[-1]; l <- l[-1]; c <- c[-1]; m <- m[-1] - o <- o[-1] - h <- h[-1] - l <- l[-1] - c <- c[-1] - m <- m[-1] + # compute log-returns + r1 <- m - o + r2 <- o - m1 + r3 <- m - c1 + r4 <- c1 - m1 + r5 <- o - c1 - tau <- h!=l | l!=c1 - phi1 <- o!=h & tau - phi2 <- o!=l & tau - phi3 <- c1!=h1 & tau - phi4 <- c1!=l1 & tau + # compute indicator variables + tau <- h != l | l != c1 + po1 <- tau & o != h + po2 <- tau & o != l + pc1 <- tau & c1 != h1 + pc2 <- tau & c1 != l1 + # compute probabilities pt <- mean(tau, na.rm = TRUE) - po <- mean(phi1, na.rm = TRUE) + mean(phi2, na.rm = TRUE) - pc <- mean(phi3, na.rm = TRUE) + mean(phi4, na.rm = TRUE) + po <- mean(po1, na.rm = TRUE) + mean(po2, na.rm = TRUE) + pc <- mean(pc1, na.rm = TRUE) + mean(pc2, na.rm = TRUE) - r1 <- m-o - r2 <- o-m1 - r3 <- m-c1 - r4 <- c1-m1 - r5 <- o-c1 + # return missing if there are less than two periods with tau=1 + # or po or pc is zero + nt <- sum(tau, na.rm = TRUE) + if(nt < 2 | (!is.nan(po) & po == 0) | (!is.nan(pc) & pc == 0)) + return(NaN) - d1 <- r1 - tau * mean(r1, na.rm = TRUE) / pt - d3 <- r3 - tau * mean(r3, na.rm = TRUE) / pt - d5 <- r5 - tau * mean(r5, na.rm = TRUE) / pt + # compute de-meaned log-returns + d1 <- r1 - mean(r1, na.rm = TRUE)/pt*tau + d3 <- r3 - mean(r3, na.rm = TRUE)/pt*tau + d5 <- r5 - mean(r5, na.rm = TRUE)/pt*tau - x1 <- -4/po*d1*r2 -4/pc*d3*r4 - x2 <- -4/po*d1*r5 -4/pc*d5*r4 + # compute input vectors + x1 <- -4./po*d1*r2 + -4./pc*d3*r4 + x2 <- -4./po*d1*r5 + -4./pc*d5*r4 - e1 <- mean(x1, na.rm = TRUE) - e2 <- mean(x2, na.rm = TRUE) + # compute expectations + e1 <- mean(x1, na.rm = TRUE) + e2 <- mean(x2, na.rm = TRUE) + # compute variances v1 <- mean(x1^2, na.rm = TRUE) - e1^2 v2 <- mean(x2^2, na.rm = TRUE) - e2^2 - s2 <- (v2*e1 + v1*e2) / (v1 + v2) + # compute square spread by using a (equally) weighted + # average if the total variance is (not) positive + vt = v1 + v2 + if(!is.na(vt) & vt > 0) + s2 = (v2*e1 + v1*e2) / vt + else + s2 = (e1 + e2) / 2. + # compute signed root s <- sqrt(abs(s2)) - if(sign & !is.na(s2) & s2 < 0) - s <- -s + if(sign) + s <- s * base::sign(s2) + # return the spread return(s) } diff --git a/r/R/spread.R b/r/R/spread.R index 7d17d6b..e203ab0 100644 --- a/r/R/spread.R +++ b/r/R/spread.R @@ -18,15 +18,11 @@ #' @param x \code{\link[xts]{xts}} object with columns named \code{Open}, \code{High}, \code{Low}, \code{Close}. #' @param width integer width of the rolling window to use, or vector of endpoints defining the intervals to use. By default, the whole time series is used to compute a single spread estimate. #' @param method the estimator(s) to use. See details. -#' @param sign whether signed estimates should be returned. -#' @param na.rm whether missing values should be ignored. +#' @param sign whether to return signed estimates. +#' @param na.rm whether to ignore missing values. #' #' @return Time series of spread estimates. A value of 0.01 corresponds to a spread of 1\%. #' -#' @note -#' Please cite Ardia, Guidotti, & Kroencke (2024) -#' when using this package in publication. -#' #' @references #' Ardia, D., Guidotti, E., Kroencke, T.A. (2024). Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices. Journal of Financial Economics, 161, 103916. #' \doi{10.1016/j.jfineco.2024.103916} @@ -68,7 +64,7 @@ spread <- function(x, width = nrow(x), method = "EDGE", sign = FALSE, na.rm = FA stop("x must be a xts object") if(nrow(x) < 3) - stop("x contains less than 3 observations") + stop("x must contain at least 3 observations") method <- toupper(method) colnames(x) <- toupper(gsub("^(.*\\b)(Open|High|Low|Close)$", "\\2", colnames(x))) diff --git a/r/R/utils.R b/r/R/utils.R index ebead6a..26a129c 100644 --- a/r/R/utils.R +++ b/r/R/utils.R @@ -11,6 +11,11 @@ NULL #' rsum <- function(x, width, na.rm){ + if(length(width) == 1 && width < 1){ + width <- 1 + x[] <- NaN + } + if(length(width) == 1 && width == nrow(x)) width <- c(0, width) @@ -27,6 +32,11 @@ rsum <- function(x, width, na.rm){ #' rmean <- function(x, width, na.rm){ + if(length(width) == 1 && width < 1){ + width <- 1 + x[] <- NaN + } + if(length(width) == 1 && width == nrow(x)) width <- c(0, width) diff --git a/r/README.md b/r/README.md index f639bd0..e41c41d 100644 --- a/r/README.md +++ b/r/README.md @@ -1,40 +1,40 @@ # Efficient Estimation of Bid-Ask Spreads from Open, High, Low, and Close Prices -Implements an efficient estimator of bid-ask spreads from open, high, low, and close prices. +Implements the efficient estimator of bid-ask spreads from open, high, low, and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024): [https://doi.org/10.1016/j.jfineco.2024.103916](https://doi.org/10.1016/j.jfineco.2024.103916) ## Installation -Install this package with: - ```R install.packages("bidask") ``` ## Usage -Load the library: +There are three functions in this package. The function `edge` computes a single bid-ask spread estimate from vectors of open, high, low, and close prices. The function `spread` is optimized for fast calculations over rolling windows, and implements additional estimators. The function `sim` simulates a time series of open, high, low, and close prices. The full [documentation](https://CRAN.R-project.org/package=bidask/bidask.pdf) is available on [CRAN](https://cran.r-project.org/package=bidask). A [vignette](https://cran.r-project.org/package=bidask/vignettes/bidask.html) is also available. ```R library("bidask") ``` -Arguments: +### Function `edge` + +The input prices must be sorted in ascending order of the timestamp. The output value is the spread estimate. A value of 0.01 corresponds to a spread of 1%. ```R edge(open, high, low, close, sign=FALSE) ``` -| field | description | -| ------- | ------------------------------------------- | -| `open` | Numeric vector of open prices | -| `high` | Numeric vector of high prices | -| `low` | Numeric vector of low prices | -| `close` | Numeric vector of close prices | -| `sign` | Whether signed estimates should be returned | +| field | description | +| ------- | ----------------------------------- | +| `open` | Numeric vector of open prices. | +| `high` | Numeric vector of high prices. | +| `low` | Numeric vector of low prices. | +| `close` | Numeric vector of close prices. | +| `sign` | Whether to return signed estimates. | -The input prices must be sorted in ascending order of the timestamp. +### Functions `spread` and `sim` -The output value is the spread estimate. A value of 0.01 corresponds to a spread of 1%. +For more information about these functions, see the [documentation](https://CRAN.R-project.org/package=bidask/bidask.pdf). ## Example diff --git a/r/man/EDGE.Rd b/r/man/EDGE.Rd index 38670e9..ec2ac01 100644 --- a/r/man/EDGE.Rd +++ b/r/man/EDGE.Rd @@ -15,23 +15,19 @@ edge(open, high, low, close, sign = FALSE) \item{close}{numeric vector of close prices.} -\item{sign}{whether signed estimates should be returned.} +\item{sign}{whether to return signed estimates.} } \value{ The spread estimate. A value of 0.01 corresponds to a spread of 1\%. } \description{ -Implements an efficient estimator of bid-ask spreads -from open, high, low, and close prices as described in -Ardia, Guidotti, & Kroencke (2024). +Implements the efficient estimator of bid-ask spreads from open, high, low, +and close prices described in Ardia, Guidotti, & Kroencke (JFE, 2024): +\doi{10.1016/j.jfineco.2024.103916} } \details{ Prices must be sorted in ascending order of the timestamp. } -\note{ -Please cite Ardia, Guidotti, & Kroencke (2024) -when using this package in publication. -} \examples{ # simulate open, high, low, and close prices with spread 1\% x <- sim(spread = 0.01) diff --git a/r/man/spread.Rd b/r/man/spread.Rd index 4b99e03..e90929a 100644 --- a/r/man/spread.Rd +++ b/r/man/spread.Rd @@ -13,9 +13,9 @@ spread(x, width = nrow(x), method = "EDGE", sign = FALSE, na.rm = FALSE) \item{method}{the estimator(s) to use. See details.} -\item{sign}{whether signed estimates should be returned.} +\item{sign}{whether to return signed estimates.} -\item{na.rm}{whether missing values should be ignored.} +\item{na.rm}{whether to ignore missing values.} } \value{ Time series of spread estimates. A value of 0.01 corresponds to a spread of 1\%. @@ -36,10 +36,6 @@ The method \code{CS} implements the estimator described in Corwin & Schultz (201 The method \code{ROLL} implements the estimator described in Roll (1984). } -\note{ -Please cite Ardia, Guidotti, & Kroencke (2024) -when using this package in publication. -} \examples{ # simulate open, high, low, and close prices with spread 1\% x <- sim(spread = 0.01) diff --git a/r/tests/testthat/test-edge.R b/r/tests/testthat/test-edge.R index 1950bc6..7d9c489 100644 --- a/r/tests/testthat/test-edge.R +++ b/r/tests/testthat/test-edge.R @@ -33,13 +33,16 @@ test_that("edge-rolling", { set.seed(123) x <- sim(prob = 0.01) - width <- 21 - s1 <- spread(x, width = width, method = "EDGE") - s2 <- zoo::rollapplyr(x, width = width, by.column = FALSE, FUN = function(x){ - edge(x$Open, x$High, x$Low, x$Close) - })[-(1:width-1)] - - expect_equal(as.numeric(s1), as.numeric(s2)) + for(width in c(1, 2, 3, 4, 21, 100)){ + + s1 <- spread(x, width = width, method = "EDGE") + s2 <- zoo::rollapplyr(x, width = width, by.column = FALSE, FUN = function(x){ + edge(x$Open, x$High, x$Low, x$Close) + })[-(1:max(1, width-1))] + + expect_equal(as.numeric(s1), as.numeric(s2), label=paste("width = ", width)) + + } })