We want to see whether certain forecasts perform better than random ones in making an investment decision on stocks.
First we define 2 functions which invest when we predict a rise and calculate the winnings.
library(forecast)
## Loading required package: RcppArmadillo
## Loading required package: Rcpp
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: timeDate
## Loading required package: tseries
## Loading required package: quadprog
## Loading required package: fracdiff
## Loading required package: nnet
## Loading required package: colorspace
## Loading required package: parallel
## Loading required package: ggplot2
## Loading required package: digest
## Loading required package: grid
## Loading required package: gtable
## Loading required package: MASS
## Loading required package: plyr
## Loading required package: reshape2
## Loading required package: stringr
## Loading required package: stringi
## Loading required package: magrittr
## Loading required package: lattice
## Loading required package: scales
## Loading required package: RColorBrewer
## Loading required package: dichromat
## Loading required package: munsell
##
## Attaching package: 'munsell'
## The following object is masked from 'package:colorspace':
##
## desaturate
## Loading required package: labeling
## Loading required package: tibble
## Loading required package: assertthat
## Loading required package: lazyeval
##
## Attaching package: 'tibble'
## The following object is masked from 'package:assertthat':
##
## has_name
## This is forecast 7.2
invest_eval <- function(last, pred, actual) {
# if forecast > last invest, calc the return (sell next period)
if (pred > last) {
return(actual - last)
} else {
return(0)
}
}
wins <- function(x, p, idx) {
stopifnot(max(idx) <= length(x))
w <- sapply(idx, function(i) {
invest_eval(x[i - 1], p[i], x[i])
})
return(w)
}
set.seed(100)
n <- 100
# 2 uncorrelated random series
xrand <- rnorm(n, 10, 1)
prand <- rnorm(n, 10, 1)
cor(xrand, prand)
## [1] -0.1350806
# 2 uncorrelated trended series
xtrend <- 1:n / 5 + rnorm(n, 0, 1)
ptrend <- 1:n / 5 + rnorm(n, 0, 1)
cor(xtrend, ptrend)
## [1] 0.9651826
# 2 process with same structure but not related
xar1 <- 10 + arima.sim(list(ar = .8), n)
par1 <- 10 + arima.sim(list(ar = .8), n)
cor(xar1, par1)
## [1] -0.2807578
# auto.arima forecast
par1f <- sapply(11:n, function(i) {
# 1:10 -> 11
forecast(auto.arima(xar1[1:(i - 1)]), 1)$mean
})
par1f <- c(rep(NA, 10), par1f)
# theta forecast
par1f2 <- sapply(11:n, function(i) {
# 1:10 -> 11
thetaf(xar1[1:(i - 1)], 1)$mean
})
par1f2 <- c(rep(NA, 10), par1f2)
# naive oppose the sign
par1f3 <- sapply(11:n, function(i) {
# oppose the last change
xar1[i - 1] - sign(xar1[i - 1] - xar1[i - 2])
})
par1f3 <- c(rep(NA, 10), par1f3)
cor(xar1, par1, use = 'compl')
## [1] -0.2807578
cor(xar1, par1f, use = 'compl')
## [1] 0.75465
cor(xar1, par1f2, use = 'compl')
## [1] 0.7606652
cor(xar1, par1f3, use = 'compl')
## [1] 0.6945875
# calc wins
wrand <- wins(xrand, prand, 11:n)
wtrend <- wins(xtrend, ptrend, 11:n)
war1 <- wins(xar1, par1, 11:n)
war1f <- wins(xar1, par1f, 11:n)
war1f2 <- wins(xar1, par1f2, 11:n)
war1f3 <- wins(xar1, par1f3, 11:n)
matplot(cbind(xrand, prand), type = 'l', main = 'random')
matplot(cbind(xtrend, ptrend), type = 'l', main = 'trend')
matplot(cbind(xar1, par1, par1f, par1f2, par1f3), type = 'l', main = 'AR1')
legend('bottomleft', c('ar1', 'random', 'auto.arima', 'thetaf', 'opp sign'), lty = 1, col = 1:5)
# plot cum wins
plot(cumsum(wrand), type = 'l')
plot(cumsum(wtrend), type = 'l')
matplot(cbind(cumsum(war1), cumsum(war1f), cumsum(war1f2), cumsum(war1f3)), type = 'l')
legend('bottomleft', c('random', 'auto.arima', 'thetaf', 'opp sign'), lty = 1, col = 1:4)
# sum wins
sum(wrand)
## [1] 36.48024
sum(wtrend)
## [1] 34.05566
sum(war1)
## [1] 7.883858
sum(war1f)
## [1] 6.148303
sum(war1f2)
## [1] 4.549689
sum(war1f3)
## [1] 4.27178
# what is structure of random and trended series
summary(auto.arima(xrand))
## Series: xrand
## ARIMA(1,0,0) with non-zero mean
##
## Coefficients:
## ar1 intercept
## -0.2674 10.0065
## s.e. 0.0966 0.0774
##
## sigma^2 estimated as 0.9769: log likelihood=-139.75
## AIC=285.51 AICc=285.76 BIC=293.32
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.001174902 0.978472 0.7524711 -0.9805923 7.63519 0.5873766
## ACF1
## Training set -0.001030692
summary(auto.arima(xtrend))
## Series: xtrend
## ARIMA(5,1,0) with drift
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 drift
## -0.9016 -0.8993 -0.7523 -0.3595 -0.2340 0.1997
## s.e. 0.0984 0.1295 0.1396 0.1306 0.0993 0.0267
##
## sigma^2 estimated as 1.248: log likelihood=-149.15
## AIC=312.31 AICc=313.54 BIC=330.47
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.005208327 1.077424 0.854348 -12.84862 30.82583 0.7129948
## ACF1
## Training set -0.03173645
Note that all series have mean of 10, so wins are kind of comparable. Using uncorrelated data for random and trended data, gives positive returns.
For an AR(1) the random process gives the best returns, 3 forecast methods give worse returns.
# now try with real data
library(quantmod)
## Loading required package: xts
## Loading required package: TTR
## Version 0.4-0 included new data defaults. See ?getSymbols.
set.seed(100)
getSymbols('DIS')
## As of 0.4-0, 'getSymbols' uses env=parent.frame() and
## auto.assign=TRUE by default.
##
## This behavior will be phased out in 0.5-0 when the call will
## default to use auto.assign=FALSE. getOption("getSymbols.env") and
## getOptions("getSymbols.auto.assign") are now checked for alternate defaults
##
## This message is shown once per session and may be disabled by setting
## options("getSymbols.warning4.0"=FALSE). See ?getSymbols for more details.
## [1] "DIS"
plot(DIS)
## Warning in plot.xts(DIS): only the univariate series will be plotted
x2 <- as.data.frame(DIS)[1:n, 4]
summary(auto.arima(x2))
## Series: x2
## ARIMA(1,1,1)
##
## Coefficients:
## ar1 ma1
## 0.5898 -0.8354
## s.e. 0.1436 0.0941
##
## sigma^2 estimated as 0.1415: log likelihood=-42.8
## AIC=91.6 AICc=91.85 BIC=99.38
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.0445712 0.3704807 0.2589162 0.1177053 0.7435187 0.9297317
## ACF1
## Training set 0.005747877
# a unrelated process with same structure
parma11 <- mean(x2) + arima.sim(list(order = c(1, 1, 1), ar = .59, ma = -.84), n, sd = sd(x2))[1:n]
matplot(cbind(x2, parma11), type = 'l')
legend('bottomleft', c('DIS', 'ARIMA(1,1,1)'), lty = 1, col = 1:2)
px2f <- sapply(11:n, function(i) {
# 1:10 -> 11
forecast(auto.arima(x2[1:(i - 1)]), 1)$mean
})
px2f <- c(rep(NA, 10), px2f)
px2f2 <- sapply(11:n, function(i) {
# 1:10 -> 11
thetaf(x2[1:(i - 1)], 1)$mean
})
px2f2 <- c(rep(NA, 10), px2f2)
px2f3 <- sapply(11:n, function(i) {
# oppose the last change
x2[i - 1] - sign(x2[i - 1] - x2[i - 2])
})
px2f3 <- c(rep(NA, 10), px2f3)
cor(x2, parma11, use = 'compl')
## [1] -0.2755307
cor(x2, px2f, use = 'compl')
## [1] 0.8332456
cor(x2, px2f2, use = 'compl')
## [1] 0.8296683
cor(x2, px2f3, use = 'compl')
## [1] 0.4018015
matplot(cbind(x2, parma11, px2f, px2f2, px2f3), type = 'l')
legend('bottomleft', c('actual', 'random', 'auto.arima', 'thetaf', 'opp sign'), lty = 1, col = 1:5)
wx2 <- wins(x2, parma11, 11:n)
wx2f <- wins(x2, px2f, 11:n)
wx2f2 <- wins(x2, px2f2, 11:n)
wx2f3 <- wins(x2, px2f3, 11:n)
matplot(cbind(cumsum(wx2), cumsum(wx2f), cumsum(wx2f2), cumsum(wx2f3)), type = 'l')
legend('bottomleft', c('random', 'auto.arima', 'thetaf', 'opp sign'), lty = 1, col = 1:4)
sum(wx2)
## [1] 3.00999
sum(wx2f)
## [1] 2.669996
sum(wx2f2)
## [1] 0.079993
sum(wx2f3)
## [1] -0.310013
The random process and the Arima forecast have positive returns. Sign forecast has negative returns.