Implemented fix for out of bounds script and incorrect trading period…

… suggested by N0talent - see issue braverock#90 braverock#90
randomjohn · Nov 3, 2018 · 436f507 · 436f507
1 parent be01b35
commit 436f507
Showing 1 changed file with 67 additions and 57 deletions.
diff --git a/R/walk.forward.R b/R/walk.forward.R
@@ -2,7 +2,7 @@
 # R (http://r-project.org/) Quantitative Strategy Model Framework
 #
 # Copyright (c) 2009-2015
-# Peter Carl, Dirk Eddelbuettel, Brian G. Peterson, Jeffrey Ryan, and Joshua Ulrich 
+# Peter Carl, Dirk Eddelbuettel, Brian G. Peterson, Jeffrey Ryan, and Joshua Ulrich
 #
 # This library is distributed under the terms of the GNU Public License (GPL)
 # for full details see the file COPYING
@@ -16,7 +16,7 @@
 ###############################################################################
 
 #' Rolling Walk Forward Analysis
-#' 
+#'
 #' A wrapper for \code{\link{apply.paramset}} and \code{\link{applyStrategy}},
 #' implementing a Rolling Walk Forward Analysis (WFA).
 #'
@@ -28,30 +28,30 @@
 #' following the training window (testing window). Once completed, the training
 #' window is shifted forward by a time period equal to the testing window size,
 #' and the process is repeated. The final testing window may be shorter than the
-#' full testing window, if the length of the time series does not allow a full 
+#' full testing window, if the length of the time series does not allow a full
 #' testing window.
-#' 
+#'
 #' 'anchored' walk forward forces all training windows to start on the first
 #' observation of the market data.  This can be useful if the indicators make
 #' use of all the data, e.g. for a risk metric such as a volatility estimator,
 #' for a regime model, or for a long-memory process of some sort.  If
 #' \code{anchored=TRUE} and you have specified \code{k.training}, then the
-#' performance of each paramset will only be evaluated on the rolling training 
-#' window, even though larger (anchored) periods are used for input calculations.  
+#' performance of each paramset will only be evaluated on the rolling training
+#' window, even though larger (anchored) periods are used for input calculations.
 #'
 #' Note that walk.forward will generate out of sample (OOS) transactions using
 #' the chosen parameter set into the portfolio designated by portfolio.st. So
-#' walk.forward shoud be supplied with a 'clean' portfolio environment to avoid 
-#' issues such as out of order transactions. 
+#' walk.forward shoud be supplied with a 'clean' portfolio environment to avoid
+#' issues such as out of order transactions.
 #'
 #' The \code{psgc} argument is a tradeoff between memory efficiency and speed.
-#' \R does garbage collection promarily when it is running low on RAM, but this 
+#' \R does garbage collection promarily when it is running low on RAM, but this
 #' automatic detection works poorly in parallel processes.  If TRUE, the default,
 #' \code{walk.proward} and \code{\link{apply.paramset}} will call \code{gc()}
-#' at key points to limit RAM usage.  For small tests, this is probably 
+#' at key points to limit RAM usage.  For small tests, this is probably
 #' unecessary and will only slow the test.  For large tests, even on substantial
 #' hardware, it may be the difference between completing the test and crashing \R.
-#'   
+#'
 #' @param portfolio.st the name of the portfolio object
 #' @param account.st the name of the account object
 #' @param strategy.st the name of the strategy object
@@ -70,15 +70,15 @@
 #' @param savewf boolean, default FALSE. if TRUE, saves audit information on training and testing periods to working directory for later analysis
 #' @param saveenv boolean, default FALSE. if TRUE, save the paramset environment information for each trial, and not just the tradeStats and chosen paramset
 #' @param psgc boolean, if TRUE, the default, run gc() in each worker session to conserve RAM.
-#'  
+#'
 #' @return a list consisting of a slot containing detailed results for each training + testing period, as well as the portfolio and the tradeStats() for the portfolio
 #'
 #' @references Tomasini, E. and Jaekle, U. Trading Systems. 2009. Chapter 6
-#' @seealso 
+#' @seealso
 #'     \code{\link{applyStrategy}} ,
 #'     \code{\link{apply.paramset}} ,
 #'     \code{\link{chart.forward}} ,
-#'     \code{\link{chart.forward.training}} ,  
+#'     \code{\link{chart.forward.training}} ,
 #'     \code{\link{endpoints}} ,
 #'     \code{\link[blotter]{tradeStats}}
 #'
@@ -124,23 +124,33 @@ walk.forward <- function(  strategy.st
     total.timespan <- paste(index(symbol.data[total.start]), '', sep='/', index(last(symbol.data)))
 
     # construct the subsets to use for training/testing
-    training.end.v   <- ep[c(k.training,k.training+cumsum(rep(k.testing,as.integer((length(ep)-k.training)/k.testing))))]
-    if( is.na(last(training.end.v)) ) {
-      training.end.v <- training.end.v[-length(training.end.v)]
-    }
-
-    training.start.v <- c(1,1+ep[cumsum(rep(k.testing,as.integer((length(ep)-k.training)/k.testing)))])
-
+    #define first Training interval
+    first.training.end<-1+k.training
+    #Calculate how many complete training periods are in the Dataset.
+    len<-length(ep[-length(ep)])    #Removed last period, because ep[nrow(ep)]-ep[nrow(ep)-1] isnt always a full period (e.g.period = 'months'= 01.01-01.15)
+    trainingperiods.total<-as.integer((len-first.training.end)/k.testing)
+    training.steps<-rep(k.testing,trainingperiods.total)
+
+    #construct index for ep
+    ii<-first.training.end+cumsum(training.steps)
+    i<-c(first.training.end,ii)
+    training.end.v   <- ep[i]
+
+    #construct Training starting points by subtracting k.training from calculated training endpoints
+    first.training.start<-1
+    i<-ii-k.training
+    training.start.v <- c(first.training.start,ep[i])
+
     if(anchored || anchored=='anchored' || anchored=='rolling.subset'){
       perf.start.v     <- training.start.v
       perf.start       <- index(symbol.data[training.start.v])
     } else {
       perf.start <- perf.start.v  <- rep(NA,length(training.start.v))
     }
-    
+
     testing.start.v  <- 1+training.end.v
     testing.end.v    <- c(training.end.v[-1],last(ep))
-    
+
     training.start   <- index(symbol.data[training.start.v])
     if(anchored || anchored=='anchored' || anchored=='rolling.subset'){
       training.start.v <- rep(1,length(training.start.v))
@@ -149,7 +159,7 @@ walk.forward <- function(  strategy.st
     training.end     <- index(symbol.data[training.end.v])
     testing.start    <- index(symbol.data[testing.start.v])
     testing.end      <- index(symbol.data[testing.end.v])
-    
+
     wf.subsets       <- data.frame( training.start=training.start
                                   , training.end=training.end
                                   , testing.start=testing.start
@@ -166,7 +176,7 @@ walk.forward <- function(  strategy.st
 
     # set up our control variables
     old.param.combo <- NULL
-    
+
     # now loop over training and testing periods, collecting output
     # do the traditional rolling method, computationally expensive
     for(i in 1:nrow(wf.subsets))
@@ -177,7 +187,7 @@ walk.forward <- function(  strategy.st
       } else {
         .audit=NULL
       }
-      
+
       training.timespan <- paste(wf.subsets[i,'training.start'], wf.subsets[i,'training.end'], sep='/')
       testing.timespan  <- paste(wf.subsets[i,'testing.start'], wf.subsets[i,'testing.end'], sep='/')
 
@@ -187,16 +197,16 @@ walk.forward <- function(  strategy.st
       } else {
         perf.subset <- training.timespan
       }
-      
+
       t.start <- wf.subsets[i,'training.start']
       t.end   <- wf.subsets[i,'training.end']
-      
+
       result$training.timespan <- training.timespan
       result$testing.timespan  <- testing.timespan
-      
+
       print(paste('=== training', paramset.label, 'on', training.timespan))
-      
-      
+
+
       # run backtests on training window
       result$apply.paramset <- apply.paramset( strategy.st=strategy.st
                                              , paramset.label=paramset.label
@@ -211,14 +221,14 @@ walk.forward <- function(  strategy.st
                                              , perf.subset=perf.subset
                                              , ...=...
                                              )
-      
+
       tradeStats.list <- result$apply.paramset$tradeStats
-      
+
       if(!missing(k.testing) && k.testing>0)
       {
         if(!is.function(obj.func))
           stop(paste(obj.func, 'unknown obj function', sep=': '))
-        
+
         # select best param.combo
         param.combo.idx <- try(do.call(obj.func, obj.args))
         if(length(param.combo.idx) == 0 || class(param.combo.idx)=="try-error"){
@@ -233,37 +243,37 @@ walk.forward <- function(  strategy.st
           }
         } else {
           if(length(param.combo.idx)>1){
-            # choose the last row because expand.grid in paramsets will make 
-            # the last row the row with the largest parameter values, roughly 
-            # equivalent to highest stability of data usage, 
+            # choose the last row because expand.grid in paramsets will make
+            # the last row the row with the largest parameter values, roughly
+            # equivalent to highest stability of data usage,
             # or lowest degrees of freedom
             param.combo.idx <- last(param.combo.idx)
           }
           param.combo <- tradeStats.list[param.combo.idx, 1:grep('Portfolio', names(tradeStats.list)) - 1]
           param.combo.nr <- row.names(tradeStats.list)[param.combo.idx]
         }
-        
+
         old.param.combo<-param.combo
-        
+
         result$testing.param.combo <- param.combo
         result$testing.param.combo.idx <- param.combo.idx
-        
+
         if(!is.null(.audit))
         {
           assign('obj.func', obj.func, envir=.audit)
           assign('param.combo.idx', param.combo.idx, envir=.audit)
           assign('param.combo.nr', param.combo.nr, envir=.audit)
           assign('param.combo', param.combo, envir=.audit)
-        } 
-        
+        }
+
         # configure strategy to use selected param.combo
         strategy <- install.param.combo(strategy, param.combo, paramset.label)
-        
+
         result$testing.timespan <- testing.timespan
-        
+
         print(paste('=== testing param.combo', param.combo.nr, 'on', testing.timespan))
         print(param.combo)
-        
+
         # run backtest using selected param.combo
         # NOTE, this will generate OOS transactions in the portfolio identified,
         # so strart with a clean portfolio environment.
@@ -281,11 +291,11 @@ walk.forward <- function(  strategy.st
       iso.format <- "%Y%m%dT%H%M%S"
       time.range <- paste(format(index(symbol.data[t.start]), iso.format),
                           format(index(symbol.data[t.end]), iso.format), sep=".")
-      
+
       if(!is.null(.audit) && !is.null(audit.prefix)){
-        
+
         result$audit      <- .audit
-        
+
         if(savewf){
           filestr<-paste(audit.prefix, symbol.st, time.range, "RData", sep=".")
           if(verbose) cat('Saving .audit env in file: ',filestr,'\n')
@@ -296,19 +306,19 @@ walk.forward <- function(  strategy.st
       if(include.insamples){
         results[[time.range]] <- result
       }
-      
+
     } # end full rolling training/testing loop
 
     if(include.insamples){
       # run apply.paramset on the entire period
       if(!is.null(.audit)){
-        # only keep the debug auditing information if we are 
+        # only keep the debug auditing information if we are
         # keeping it for the rest of the simulation
         .insampleaudit <- new.env()
       } else {
         .insampleaudit <- NULL
       }
-      results$insample.apply.paramset <- 
+      results$insample.apply.paramset <-
         apply.paramset( strategy.st=strategy.st
                         , paramset.label=paramset.label
                         , portfolio.st=portfolio.st
@@ -335,12 +345,12 @@ walk.forward <- function(  strategy.st
     results$blotter    <- .blotter
     results$strategy   <- .strategy
     results$wf.subsets <- wf.subsets
-    
+
     results$portfolio.st <- portfolio.st
-    
+
     results$testing.parameters <- NULL
     for (tp in ls(pattern='*.[0-9]+',pos=results)){
-      tr <- cbind(results[[tp]][['testing.param.combo']], 
+      tr <- cbind(results[[tp]][['testing.param.combo']],
                   results[[tp]][['testing.timespan']])
       if(is.null(results$testing.parameters)){
         results$testing.parameters <- tr
@@ -349,17 +359,17 @@ walk.forward <- function(  strategy.st
       }
     }
     colnames(results$testing.parameters)[ncol(results$testing.parameters)] <- 'testing.timespan'
-    
+
     if(!is.null(.audit) && !is.null(audit.prefix))
     {
       results$audit      <- .audit
     }
-    
+
     if(savewf){
       filestr<-paste(audit.prefix, symbol.st, time.range,"Results","RData", sep=".")
       cat('\n','Saving final results env in file: ',filestr,'\n')
       save(results, file = filestr)
     }
-    
+
     return(results)
 }