Investment evaluation as measure of forecast success

We want to see whether certain forecasts perform better than random ones in making an investment decision on stocks.

First we define 2 functions which invest when we predict a rise and calculate the winnings.

library(forecast)

## Loading required package: RcppArmadillo

## Loading required package: Rcpp

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## Loading required package: timeDate

## Loading required package: tseries

## Loading required package: quadprog

## Loading required package: fracdiff

## Loading required package: nnet

## Loading required package: colorspace

## Loading required package: parallel

## Loading required package: ggplot2

## Loading required package: digest

## Loading required package: grid

## Loading required package: gtable

## Loading required package: MASS

## Loading required package: plyr

## Loading required package: reshape2

## Loading required package: stringr

## Loading required package: stringi

## Loading required package: magrittr

## Loading required package: lattice

## Loading required package: scales

## Loading required package: RColorBrewer

## Loading required package: dichromat

## Loading required package: munsell

## 
## Attaching package: 'munsell'

## The following object is masked from 'package:colorspace':
## 
##     desaturate

## Loading required package: labeling

## Loading required package: tibble

## Loading required package: assertthat

## Loading required package: lazyeval

## 
## Attaching package: 'tibble'

## The following object is masked from 'package:assertthat':
## 
##     has_name

## This is forecast 7.2

invest_eval <- function(last, pred, actual) {
  # if forecast > last invest, calc the return (sell next period)
  if (pred > last) {
    return(actual - last)
  } else {
    return(0)
  }
}

wins <- function(x, p, idx) {
  stopifnot(max(idx) <= length(x))
  w <- sapply(idx, function(i) {
    invest_eval(x[i - 1], p[i], x[i])
  })
  return(w)
}

set.seed(100)
n <- 100

# 2 uncorrelated random series
xrand <- rnorm(n, 10, 1)
prand <- rnorm(n, 10, 1)
cor(xrand, prand)

## [1] -0.1350806

# 2 uncorrelated trended series
xtrend <- 1:n / 5 + rnorm(n, 0, 1)
ptrend <- 1:n / 5 + rnorm(n, 0, 1)
cor(xtrend, ptrend)

## [1] 0.9651826

# 2 process with same structure but not related
xar1 <- 10 + arima.sim(list(ar = .8), n)
par1 <- 10 + arima.sim(list(ar = .8), n)
cor(xar1, par1)

## [1] -0.2807578

# auto.arima forecast
par1f <- sapply(11:n, function(i) {
  # 1:10 -> 11
  forecast(auto.arima(xar1[1:(i - 1)]), 1)$mean
})
par1f <- c(rep(NA, 10), par1f)

# theta forecast
par1f2 <- sapply(11:n, function(i) {
  # 1:10 -> 11
  thetaf(xar1[1:(i - 1)], 1)$mean
})
par1f2 <- c(rep(NA, 10), par1f2)

# naive oppose the sign 
par1f3 <- sapply(11:n, function(i) {
  # oppose the last change
  xar1[i - 1] - sign(xar1[i - 1] - xar1[i - 2])
})
par1f3 <- c(rep(NA, 10), par1f3)

cor(xar1, par1, use = 'compl')

## [1] -0.2807578

cor(xar1, par1f, use = 'compl')

## [1] 0.75465

cor(xar1, par1f2, use = 'compl')

## [1] 0.7606652

cor(xar1, par1f3, use = 'compl')

## [1] 0.6945875

# calc wins
wrand <- wins(xrand, prand, 11:n)
wtrend <- wins(xtrend, ptrend, 11:n)
war1 <- wins(xar1, par1, 11:n)
war1f <- wins(xar1, par1f, 11:n)
war1f2 <- wins(xar1, par1f2, 11:n)
war1f3 <- wins(xar1, par1f3, 11:n)

matplot(cbind(xrand, prand), type = 'l', main = 'random')

matplot(cbind(xtrend, ptrend), type = 'l', main = 'trend')

matplot(cbind(xar1, par1, par1f, par1f2, par1f3), type = 'l', main = 'AR1')
legend('bottomleft', c('ar1', 'random', 'auto.arima', 'thetaf', 'opp sign'), lty = 1, col = 1:5)

# plot cum wins
plot(cumsum(wrand), type = 'l')

plot(cumsum(wtrend), type = 'l')

matplot(cbind(cumsum(war1), cumsum(war1f), cumsum(war1f2), cumsum(war1f3)), type = 'l')
legend('bottomleft', c('random', 'auto.arima', 'thetaf', 'opp sign'), lty = 1, col = 1:4)

# sum wins
sum(wrand)

## [1] 36.48024

sum(wtrend)

## [1] 34.05566

sum(war1)

## [1] 7.883858

sum(war1f)

## [1] 6.148303

sum(war1f2)

## [1] 4.549689

sum(war1f3)

## [1] 4.27178

# what is structure of random and trended series
summary(auto.arima(xrand))

## Series: xrand 
## ARIMA(1,0,0) with non-zero mean 
## 
## Coefficients:
##           ar1  intercept
##       -0.2674    10.0065
## s.e.   0.0966     0.0774
## 
## sigma^2 estimated as 0.9769:  log likelihood=-139.75
## AIC=285.51   AICc=285.76   BIC=293.32
## 
## Training set error measures:
##                        ME     RMSE       MAE        MPE    MAPE      MASE
## Training set -0.001174902 0.978472 0.7524711 -0.9805923 7.63519 0.5873766
##                      ACF1
## Training set -0.001030692

summary(auto.arima(xtrend))

## Series: xtrend 
## ARIMA(5,1,0) with drift         
## 
## Coefficients:
##           ar1      ar2      ar3      ar4      ar5   drift
##       -0.9016  -0.8993  -0.7523  -0.3595  -0.2340  0.1997
## s.e.   0.0984   0.1295   0.1396   0.1306   0.0993  0.0267
## 
## sigma^2 estimated as 1.248:  log likelihood=-149.15
## AIC=312.31   AICc=313.54   BIC=330.47
## 
## Training set error measures:
##                       ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 0.005208327 1.077424 0.854348 -12.84862 30.82583 0.7129948
##                     ACF1
## Training set -0.03173645

Note that all series have mean of 10, so wins are kind of comparable. Using uncorrelated data for random and trended data, gives positive returns.

For an AR(1) the random process gives the best returns, 3 forecast methods give worse returns.

# now try with real data

library(quantmod)

## Loading required package: xts

## Loading required package: TTR

## Version 0.4-0 included new data defaults. See ?getSymbols.

set.seed(100)
getSymbols('DIS')

##     As of 0.4-0, 'getSymbols' uses env=parent.frame() and
##  auto.assign=TRUE by default.
## 
##  This  behavior  will be  phased out in 0.5-0  when the call  will
##  default to use auto.assign=FALSE. getOption("getSymbols.env") and 
##  getOptions("getSymbols.auto.assign") are now checked for alternate defaults
## 
##  This message is shown once per session and may be disabled by setting 
##  options("getSymbols.warning4.0"=FALSE). See ?getSymbols for more details.

## [1] "DIS"

plot(DIS)

## Warning in plot.xts(DIS): only the univariate series will be plotted

x2 <- as.data.frame(DIS)[1:n, 4]

summary(auto.arima(x2))

## Series: x2 
## ARIMA(1,1,1)                    
## 
## Coefficients:
##          ar1      ma1
##       0.5898  -0.8354
## s.e.  0.1436   0.0941
## 
## sigma^2 estimated as 0.1415:  log likelihood=-42.8
## AIC=91.6   AICc=91.85   BIC=99.38
## 
## Training set error measures:
##                     ME      RMSE       MAE       MPE      MAPE      MASE
## Training set 0.0445712 0.3704807 0.2589162 0.1177053 0.7435187 0.9297317
##                     ACF1
## Training set 0.005747877

# a unrelated process with same structure
parma11 <- mean(x2) + arima.sim(list(order = c(1, 1, 1), ar = .59, ma = -.84), n, sd = sd(x2))[1:n]
matplot(cbind(x2, parma11), type = 'l')
legend('bottomleft', c('DIS', 'ARIMA(1,1,1)'), lty = 1, col = 1:2)

px2f <- sapply(11:n, function(i) {
  # 1:10 -> 11
  forecast(auto.arima(x2[1:(i - 1)]), 1)$mean
})
px2f <- c(rep(NA, 10), px2f)

px2f2 <- sapply(11:n, function(i) {
  # 1:10 -> 11
  thetaf(x2[1:(i - 1)], 1)$mean
})
px2f2 <- c(rep(NA, 10), px2f2)

px2f3 <- sapply(11:n, function(i) {
  # oppose the last change
  x2[i - 1] - sign(x2[i - 1] - x2[i - 2])
})
px2f3 <- c(rep(NA, 10), px2f3)

cor(x2, parma11, use = 'compl')

## [1] -0.2755307

cor(x2, px2f, use = 'compl')

## [1] 0.8332456

cor(x2, px2f2, use = 'compl')

## [1] 0.8296683

cor(x2, px2f3, use = 'compl')

## [1] 0.4018015

matplot(cbind(x2, parma11, px2f, px2f2, px2f3), type = 'l')
legend('bottomleft', c('actual', 'random', 'auto.arima', 'thetaf', 'opp sign'), lty = 1, col = 1:5)

wx2 <- wins(x2, parma11, 11:n)
wx2f <- wins(x2, px2f, 11:n)
wx2f2 <- wins(x2, px2f2, 11:n)
wx2f3 <- wins(x2, px2f3, 11:n)

matplot(cbind(cumsum(wx2), cumsum(wx2f), cumsum(wx2f2), cumsum(wx2f3)), type = 'l')
legend('bottomleft', c('random', 'auto.arima', 'thetaf', 'opp sign'), lty = 1, col = 1:4)

sum(wx2)

## [1] 3.00999

sum(wx2f)

## [1] 2.669996

sum(wx2f2)

## [1] 0.079993

sum(wx2f3)

## [1] -0.310013

The random process and the Arima forecast have positive returns. Sign forecast has negative returns.