Skip to content

Commit

Permalink
Implemented fix for out of bounds script and incorrect trading period…
Browse files Browse the repository at this point in the history
… suggested by N0talent - see issue braverock#90 braverock#90
  • Loading branch information
randomjohn committed Nov 3, 2018
1 parent be01b35 commit 436f507
Showing 1 changed file with 67 additions and 57 deletions.
124 changes: 67 additions & 57 deletions R/walk.forward.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# R (http://r-project.org/) Quantitative Strategy Model Framework
#
# Copyright (c) 2009-2015
# Peter Carl, Dirk Eddelbuettel, Brian G. Peterson, Jeffrey Ryan, and Joshua Ulrich
# Peter Carl, Dirk Eddelbuettel, Brian G. Peterson, Jeffrey Ryan, and Joshua Ulrich
#
# This library is distributed under the terms of the GNU Public License (GPL)
# for full details see the file COPYING
Expand All @@ -16,7 +16,7 @@
###############################################################################

#' Rolling Walk Forward Analysis
#'
#'
#' A wrapper for \code{\link{apply.paramset}} and \code{\link{applyStrategy}},
#' implementing a Rolling Walk Forward Analysis (WFA).
#'
Expand All @@ -28,30 +28,30 @@
#' following the training window (testing window). Once completed, the training
#' window is shifted forward by a time period equal to the testing window size,
#' and the process is repeated. The final testing window may be shorter than the
#' full testing window, if the length of the time series does not allow a full
#' full testing window, if the length of the time series does not allow a full
#' testing window.
#'
#'
#' 'anchored' walk forward forces all training windows to start on the first
#' observation of the market data. This can be useful if the indicators make
#' use of all the data, e.g. for a risk metric such as a volatility estimator,
#' for a regime model, or for a long-memory process of some sort. If
#' \code{anchored=TRUE} and you have specified \code{k.training}, then the
#' performance of each paramset will only be evaluated on the rolling training
#' window, even though larger (anchored) periods are used for input calculations.
#' performance of each paramset will only be evaluated on the rolling training
#' window, even though larger (anchored) periods are used for input calculations.
#'
#' Note that walk.forward will generate out of sample (OOS) transactions using
#' the chosen parameter set into the portfolio designated by portfolio.st. So
#' walk.forward shoud be supplied with a 'clean' portfolio environment to avoid
#' issues such as out of order transactions.
#' walk.forward shoud be supplied with a 'clean' portfolio environment to avoid
#' issues such as out of order transactions.
#'
#' The \code{psgc} argument is a tradeoff between memory efficiency and speed.
#' \R does garbage collection promarily when it is running low on RAM, but this
#' \R does garbage collection promarily when it is running low on RAM, but this
#' automatic detection works poorly in parallel processes. If TRUE, the default,
#' \code{walk.proward} and \code{\link{apply.paramset}} will call \code{gc()}
#' at key points to limit RAM usage. For small tests, this is probably
#' at key points to limit RAM usage. For small tests, this is probably
#' unecessary and will only slow the test. For large tests, even on substantial
#' hardware, it may be the difference between completing the test and crashing \R.
#'
#'
#' @param portfolio.st the name of the portfolio object
#' @param account.st the name of the account object
#' @param strategy.st the name of the strategy object
Expand All @@ -70,15 +70,15 @@
#' @param savewf boolean, default FALSE. if TRUE, saves audit information on training and testing periods to working directory for later analysis
#' @param saveenv boolean, default FALSE. if TRUE, save the paramset environment information for each trial, and not just the tradeStats and chosen paramset
#' @param psgc boolean, if TRUE, the default, run gc() in each worker session to conserve RAM.
#'
#'
#' @return a list consisting of a slot containing detailed results for each training + testing period, as well as the portfolio and the tradeStats() for the portfolio
#'
#' @references Tomasini, E. and Jaekle, U. Trading Systems. 2009. Chapter 6
#' @seealso
#' @seealso
#' \code{\link{applyStrategy}} ,
#' \code{\link{apply.paramset}} ,
#' \code{\link{chart.forward}} ,
#' \code{\link{chart.forward.training}} ,
#' \code{\link{chart.forward.training}} ,
#' \code{\link{endpoints}} ,
#' \code{\link[blotter]{tradeStats}}
#'
Expand Down Expand Up @@ -124,23 +124,33 @@ walk.forward <- function( strategy.st
total.timespan <- paste(index(symbol.data[total.start]), '', sep='/', index(last(symbol.data)))

# construct the subsets to use for training/testing
training.end.v <- ep[c(k.training,k.training+cumsum(rep(k.testing,as.integer((length(ep)-k.training)/k.testing))))]
if( is.na(last(training.end.v)) ) {
training.end.v <- training.end.v[-length(training.end.v)]
}

training.start.v <- c(1,1+ep[cumsum(rep(k.testing,as.integer((length(ep)-k.training)/k.testing)))])

#define first Training interval
first.training.end<-1+k.training
#Calculate how many complete training periods are in the Dataset.
len<-length(ep[-length(ep)]) #Removed last period, because ep[nrow(ep)]-ep[nrow(ep)-1] isnt always a full period (e.g.period = 'months'= 01.01-01.15)
trainingperiods.total<-as.integer((len-first.training.end)/k.testing)
training.steps<-rep(k.testing,trainingperiods.total)

#construct index for ep
ii<-first.training.end+cumsum(training.steps)
i<-c(first.training.end,ii)
training.end.v <- ep[i]

#construct Training starting points by subtracting k.training from calculated training endpoints
first.training.start<-1
i<-ii-k.training
training.start.v <- c(first.training.start,ep[i])

if(anchored || anchored=='anchored' || anchored=='rolling.subset'){
perf.start.v <- training.start.v
perf.start <- index(symbol.data[training.start.v])
} else {
perf.start <- perf.start.v <- rep(NA,length(training.start.v))
}

testing.start.v <- 1+training.end.v
testing.end.v <- c(training.end.v[-1],last(ep))

training.start <- index(symbol.data[training.start.v])
if(anchored || anchored=='anchored' || anchored=='rolling.subset'){
training.start.v <- rep(1,length(training.start.v))
Expand All @@ -149,7 +159,7 @@ walk.forward <- function( strategy.st
training.end <- index(symbol.data[training.end.v])
testing.start <- index(symbol.data[testing.start.v])
testing.end <- index(symbol.data[testing.end.v])

wf.subsets <- data.frame( training.start=training.start
, training.end=training.end
, testing.start=testing.start
Expand All @@ -166,7 +176,7 @@ walk.forward <- function( strategy.st

# set up our control variables
old.param.combo <- NULL

# now loop over training and testing periods, collecting output
# do the traditional rolling method, computationally expensive
for(i in 1:nrow(wf.subsets))
Expand All @@ -177,7 +187,7 @@ walk.forward <- function( strategy.st
} else {
.audit=NULL
}

training.timespan <- paste(wf.subsets[i,'training.start'], wf.subsets[i,'training.end'], sep='/')
testing.timespan <- paste(wf.subsets[i,'testing.start'], wf.subsets[i,'testing.end'], sep='/')

Expand All @@ -187,16 +197,16 @@ walk.forward <- function( strategy.st
} else {
perf.subset <- training.timespan
}

t.start <- wf.subsets[i,'training.start']
t.end <- wf.subsets[i,'training.end']

result$training.timespan <- training.timespan
result$testing.timespan <- testing.timespan

print(paste('=== training', paramset.label, 'on', training.timespan))


# run backtests on training window
result$apply.paramset <- apply.paramset( strategy.st=strategy.st
, paramset.label=paramset.label
Expand All @@ -211,14 +221,14 @@ walk.forward <- function( strategy.st
, perf.subset=perf.subset
, ...=...
)

tradeStats.list <- result$apply.paramset$tradeStats

if(!missing(k.testing) && k.testing>0)
{
if(!is.function(obj.func))
stop(paste(obj.func, 'unknown obj function', sep=': '))

# select best param.combo
param.combo.idx <- try(do.call(obj.func, obj.args))
if(length(param.combo.idx) == 0 || class(param.combo.idx)=="try-error"){
Expand All @@ -233,37 +243,37 @@ walk.forward <- function( strategy.st
}
} else {
if(length(param.combo.idx)>1){
# choose the last row because expand.grid in paramsets will make
# the last row the row with the largest parameter values, roughly
# equivalent to highest stability of data usage,
# choose the last row because expand.grid in paramsets will make
# the last row the row with the largest parameter values, roughly
# equivalent to highest stability of data usage,
# or lowest degrees of freedom
param.combo.idx <- last(param.combo.idx)
}
param.combo <- tradeStats.list[param.combo.idx, 1:grep('Portfolio', names(tradeStats.list)) - 1]
param.combo.nr <- row.names(tradeStats.list)[param.combo.idx]
}

old.param.combo<-param.combo

result$testing.param.combo <- param.combo
result$testing.param.combo.idx <- param.combo.idx

if(!is.null(.audit))
{
assign('obj.func', obj.func, envir=.audit)
assign('param.combo.idx', param.combo.idx, envir=.audit)
assign('param.combo.nr', param.combo.nr, envir=.audit)
assign('param.combo', param.combo, envir=.audit)
}
}

# configure strategy to use selected param.combo
strategy <- install.param.combo(strategy, param.combo, paramset.label)

result$testing.timespan <- testing.timespan

print(paste('=== testing param.combo', param.combo.nr, 'on', testing.timespan))
print(param.combo)

# run backtest using selected param.combo
# NOTE, this will generate OOS transactions in the portfolio identified,
# so strart with a clean portfolio environment.
Expand All @@ -281,11 +291,11 @@ walk.forward <- function( strategy.st
iso.format <- "%Y%m%dT%H%M%S"
time.range <- paste(format(index(symbol.data[t.start]), iso.format),
format(index(symbol.data[t.end]), iso.format), sep=".")

if(!is.null(.audit) && !is.null(audit.prefix)){

result$audit <- .audit

if(savewf){
filestr<-paste(audit.prefix, symbol.st, time.range, "RData", sep=".")
if(verbose) cat('Saving .audit env in file: ',filestr,'\n')
Expand All @@ -296,19 +306,19 @@ walk.forward <- function( strategy.st
if(include.insamples){
results[[time.range]] <- result
}

} # end full rolling training/testing loop

if(include.insamples){
# run apply.paramset on the entire period
if(!is.null(.audit)){
# only keep the debug auditing information if we are
# only keep the debug auditing information if we are
# keeping it for the rest of the simulation
.insampleaudit <- new.env()
} else {
.insampleaudit <- NULL
}
results$insample.apply.paramset <-
results$insample.apply.paramset <-
apply.paramset( strategy.st=strategy.st
, paramset.label=paramset.label
, portfolio.st=portfolio.st
Expand All @@ -335,12 +345,12 @@ walk.forward <- function( strategy.st
results$blotter <- .blotter
results$strategy <- .strategy
results$wf.subsets <- wf.subsets

results$portfolio.st <- portfolio.st

results$testing.parameters <- NULL
for (tp in ls(pattern='*.[0-9]+',pos=results)){
tr <- cbind(results[[tp]][['testing.param.combo']],
tr <- cbind(results[[tp]][['testing.param.combo']],
results[[tp]][['testing.timespan']])
if(is.null(results$testing.parameters)){
results$testing.parameters <- tr
Expand All @@ -349,17 +359,17 @@ walk.forward <- function( strategy.st
}
}
colnames(results$testing.parameters)[ncol(results$testing.parameters)] <- 'testing.timespan'

if(!is.null(.audit) && !is.null(audit.prefix))
{
results$audit <- .audit
}

if(savewf){
filestr<-paste(audit.prefix, symbol.st, time.range,"Results","RData", sep=".")
cat('\n','Saving final results env in file: ',filestr,'\n')
save(results, file = filestr)
}

return(results)
}

0 comments on commit 436f507

Please sign in to comment.