Sie sind auf Seite 1von 23

AutoRegressive Integrated Moving Average(ARIMA)

YIK LUN, KEI


allen29@ucla.edu
This paper is a practice from the book called Analysis of Financial Time Series
by Ruey S. Tsay. All R codes and comments below are belonged to the book
and author.

Example 1
setwd("~/Desktop/Chicago")
suppressPackageStartupMessages(require(timeSeries))
data=read.table("m-ibm6708.txt",header=T)
ibm=data[,2]
t<-length(ibm)
acf(ibm)
m1=acf(ibm)

0.4
0.0

0.2

ACF

0.6

0.8

1.0

Series ibm

10

15
Lag

plot(m1)
m2=pacf(ibm)
plot(m2)

20

25

0.05
0.00
0.05

Partial ACF

0.10

Series ibm

10

15

20

Lag
Box.test(ibm,lag=log(t)) #m=ln(T) gives better performance
##
## Box-Pierce test
##
## data: ibm
## X-squared = 4.2619, df = 6.2186, p-value = 0.6669
Box.test(ibm,lag=log(t),type='Ljung')
##
## Box-Ljung test
##
## data: ibm
## X-squared = 4.3152, df = 6.2186, p-value = 0.6598

Example 2: AR(3) Expansion Duration Example


data<-read.table("dgnp82.txt")
gnp<-data[,1]
gnp1=ts(gnp,frequency=4,start=c(1947,2))
plot(gnp1,type='b')

25

0.04
0.02
0.00
0.02

gnp1

1950

1960

1970
Time

acf(gnp,lag=12)

1980

1990

0.4
0.0

0.2

ACF

0.6

0.8

1.0

Series gnp

6
Lag

pacf(gnp,lag.max=12)

10

12

0.2
0.1
0.0
0.1

Partial ACF

0.3

Series gnp

10

Lag

Box.test(gnp,lag=10,type='Ljung') # Compute Q(10) statistics


##
## Box-Ljung test
##
## data: gnp
## X-squared = 43.234, df = 10, p-value = 4.515e-06
m1=ar(gnp,method="mle") # Find the AR order
m1$aic# AIC > or = 0
##
0
## 27.8466897
##
6
## 4.0520840
##
12
## 7.1975452

1
2.7416324
7
6.0254750

2
1.6032416
8
5.9046676

3
0.0000000
9
7.5718635

m1$order #AR(3) is selected based on AIC


## [1] 3

4
0.3027852
10
7.8953337

5
2.2426608
11
9.6788727

12

0.01
0.01
0.03

m1$resid

0.03

plot(m1$resid,type='l')

50

100
Index

Box.test(m1$resid,lag=10,type='Ljung')
##
## Box-Ljung test
##
## data: m1$resid
## X-squared = 7.0808, df = 10, p-value = 0.7178
m2=arima(gnp,order=c(3,0,0)) # Estimation of parameters
Box.test(m2$residuals,lag=10,type='Ljung')
##
## Box-Ljung test
##
## data: m2$residuals
## X-squared = 7.0169, df = 10, p-value = 0.7239
plot(m2$residuals,type='l')

150

0.03
0.01
0.01
0.03

m2$residuals

50

100
Time

tsdiag(m2) # obtain 3 plots of model checking

150

Standardized Residuals

50

100

150

Time

0.4
0.0

0.2

ACF

0.6

0.8

1.0

ACF of Residuals

10

15

20

Lag

0.6
0.4
0.2
0.0

p value

0.8

1.0

p values for LjungBox statistic

6
lag

10

m2 # In R, "intercept"" denotes the mean of the series.


##
##
##
##
##
##
##
##
##
##

Call:
arima(x = gnp, order = c(3, 0, 0))
Coefficients:
ar1
ar2
0.3480 0.1793
s.e. 0.0745 0.0778

ar3
-0.1423
0.0745

intercept
0.0077
0.0012

sigma^2 estimated as 9.427e-05:

log likelihood = 565.84,

aic = -1121.68

# Constant term calculated by mean of AR


(1-m2$coef[1]-m2$coef[2]-m2$coef[3])*m2$coef[4]
##
ar1
## 0.004723112
sqrt(m2$sigma2) # Residual standard error
## [1] 0.009709322
p1=c(1,-m2$coef[1:3]) # Characteristic equation in c(constant,x,x^2,x^3)
roots=polyroot(p1) # Find solutions
roots
## [1]

1.590253+1.063882i -1.920152+0.000000i

1.590253-1.063882i

Mod(roots) # Compute the absolute values of the solutions


## [1] 1.913308 1.920152 1.913308
k=2*pi/acos(1.590253/sqrt((1.590253)^2+(1.063882)^2)) # K's formula
k
## [1] 10.65638
predict(m2,8) # Prediction 1-step to 8-step ahead.
##
##
##
##
##
##
##
##
##

$pred
Time Series:
Start = 177
End = 184
Frequency = 1
[1] 0.001236254 0.004555519 0.007454906 0.007958518 0.008181442 0.007936845
[7] 0.007820046 0.007703826
$se
9

##
##
##
##
##
##

Time Series:
Start = 177
End = 184
Frequency = 1
[1] 0.009709322 0.010280510 0.010686305 0.010688994 0.010689733 0.010694771
[7] 0.010695511 0.010696190

Example 3: AR(3) with AR(2) coefficient = 0


www<-"http://faculty.chicagobooth.edu/ruey.tsay/teaching/fts3/m-ibm3dx2608.txt"
vw=read.table(www,header=T)[,3]
pacf(vw)

0.05
0.00
0.10 0.05

Partial ACF

0.10

Series vw

10

15
Lag

m3=arima(vw,order=c(3,0,0))
m3
##
## Call:
## arima(x = vw, order = c(3, 0, 0))
##
## Coefficients:
##
ar1
ar2
ar3 intercept
10

20

25

30

##
0.1158 -0.0187 -0.1042
## s.e. 0.0315
0.0317
0.0317
##
## sigma^2 estimated as 0.002875:

0.0089
0.0017
log likelihood = 1500.86,

aic = -2991.73

(1-pnorm(abs(m3$coef)/sqrt(diag(m3$var.coef))))*2 # p-value for coefficients


##
ar1
ar2
ar3
intercept
## 2.372637e-04 5.546234e-01 1.029105e-03 1.145237e-07
(1-m3$coef[1]-m3$coef[2]-m3$coef[3])*m3$coef[4] # Intercept
##
ar1
## 0.009011937
sqrt(m3$sigma2) # Compute standard error of residuals (sigma a)
## [1] 0.0536189
Box.test(m3$residuals,lag=12,type='Ljung')
##
## Box-Ljung test
##
## data: m3$residuals
## X-squared = 16.352, df = 12, p-value = 0.1756
pv=1-pchisq(16.35,9) # Compute p-value using (12-3 coefficients) degrees of freedom
pv
## [1] 0.05992276
# To fix the AR(2) coef to zero:
m3=arima(vw,order=c(3,0,0),fixed=c(NA,0,NA,NA),transform.pars = FALSE)
# The subcommand fixed is used to fix parameter values,
# where NA denotes estimation and 0 means fixing the parameter to 0.
# The ordering of the parameters can be found using m3$coef.
m3
##
##
##
##
##
##
##
##
##
##
##

Call:
arima(x = vw, order = c(3, 0, 0), transform.pars = FALSE, fixed = c(NA, 0, NA,
NA))
Coefficients:
ar1 ar2
0.1136
0
s.e. 0.0313
0

ar3
-0.1063
0.0315

intercept
0.0089
0.0017

sigma^2 estimated as 0.002876:

log likelihood = 1500.69,

11

aic = -2993.38

(1-pnorm(abs(m3$coef[-2])/sqrt(diag(m3$var.coef))))*2 # p-value
##
ar1
ar3
intercept
## 2.833035e-04 7.519576e-04 1.745776e-07
(1-m3$coef[1]-m3$coef[2]-m3$coef[3])*m3$coef[4] # intercept
##
ar1
## 0.008881533
sqrt(m3$sigma2) # Compute residual standard error (sigma a)
## [1] 0.05362832
Box.test(m3$residuals,lag=12,type='Ljung')
##
## Box-Ljung test
##
## data: m3$residuals
## X-squared = 16.828, df = 12, p-value = 0.1562
pv=1-pchisq(16.83,10) # 12- 2 degrees of freedom
pv # p-value is higher
## [1] 0.07821131
predict(m3,8)
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##

$pred
Time Series:
Start = 997
End = 1004
Frequency = 1
[1] 0.030956276 0.021454042 0.009034718 0.006617194 0.007352650 0.008756259
[7] 0.009172633 0.009141748
$se
Time Series:
Start = 997
End = 1004
Frequency = 1
[1] 0.05362832 0.05397307 0.05397750 0.05426946 0.05428470 0.05428514
[7] 0.05428816 0.05428853

12

Example 4
If we combine differencing with autoregression and a moving average model, we
obtain a non-seasonal ARIMA model.
arima(x,order=c(p,d,q),. . . )
p= order of the autoregressive part;
d= degree of first differencing involved;
q= order of the moving average part.
suppressPackageStartupMessages(require(quantmod))
options("getSymbols.warning4.0"=FALSE)
getSymbols("UNRATE",src="FRED")
## [1] "UNRATE"
chartSeries(UNRATE)

UNRATE

[19480101/20150701]

Last 5.3
10

Jan
1948

Jan
1960

Jan
1970

Jan
1980

13

Jan
1990

Jan
2000

Jan
2010

rate

10

rate <- as.numeric(UNRATE[,1])


ts.plot(rate)

200

400
Time

acf(rate)

14

600

800

0.4
0.0

0.2

ACF

0.6

0.8

1.0

Series rate

10

15

20

25

30

Lag
#Differencing can stabilize the mean of a time series, and so eliminating trend and seasonality.
zt=diff(rate)
t.test(zt)
##
##
##
##
##
##
##
##
##
##
##

One Sample t-test


data: zt
t = 0.3147, df = 809, p-value = 0.7531
alternative hypothesis: true mean is not equal to 0
95 percent confidence interval:
-0.01228503 0.01697639
sample estimates:
mean of x
0.002345679

acf(zt)

15

0.4
0.0

0.2

ACF

0.6

0.8

1.0

Series zt

10

15
Lag

pacf(zt)

16

20

25

30

0.1
0.0
0.1

Partial ACF

0.2

0.3

Series zt

10

15

20

25

Lag

m1=ar(zt,lag.max=20,method="mle")
names(m1)
## [1]
## [5]
## [9]
## [13]

"order"
"aic"
"resid"
"call"

"ar"
"var.pred"
"n.used"
"order.max"
"method"
"series"
"asy.var.coef"

"x.mean"
"partialacf"
"frequency"

m1$order
## [1] 12
m2=arima(rate,order=c(12,1,0))
m2
##
##
##
##
##
##
##
##
##

Call:
arima(x = rate, order = c(12, 1, 0))
Coefficients:
ar1
ar2
ar3
ar4
ar5
0.0160 0.2181 0.1474 0.0986 0.1329
s.e. 0.0349 0.0350 0.0357 0.0362 0.0363
ar9
ar10
ar11
ar12
17

ar6
0.0031
0.0366

ar7
-0.0356
0.0366

ar8
0.0152
0.0364

30

##
0.0045 -0.0879 0.0266
## s.e. 0.0363
0.0358 0.0351
##
## sigma^2 estimated as 0.03732:

-0.1293
0.0351
log likelihood = 182,

tsdiag(m2,gof=24)

18

aic = -338

Standardized Residuals

200

400

600

800

Time

0.4
0.2

0.0

0.2

ACF

0.6

0.8

1.0

ACF of Residuals

10

15

20

25

Lag

0.6
0.4
0.2
0.0

p value

0.8

1.0

p values for LjungBox statistic

10

15
lag

19

20

30

which((1-pnorm(abs(m2$coef)/sqrt(diag(m2$var.coef))))*2 > 0.05)


##
##

ar1
1

ar6
6

ar7
7

ar8
8

ar9 ar11
9
11

c1=c(0,NA,NA,NA,NA,0,0,0,0,NA,0,NA) ### Remove insignificant parameters


m2a=arima(rate,order=c(12,1,0),fixed=c1,transform.pars = FALSE)
m2a
##
##
##
##
##
##
##
##
##
##
##
##
##

Call:
arima(x = rate, order = c(12, 1, 0), transform.pars = FALSE, fixed = c1)
Coefficients:
ar1
ar2
ar3
0 0.2148 0.1527
s.e.
0 0.0346 0.0338
ar11
ar12
0 -0.1298
s.e.
0
0.0340

ar4
0.1008
0.0341

sigma^2 estimated as 0.03742:

ar5
0.1312
0.0351

ar6
0
0

ar7
0
0

ar8
0
0

log likelihood = 180.98,

ar9
0
0

ar10
-0.0864
0.0345

aic = -347.97

which((1-pnorm(abs(m2$coef[-c(1,6,7,8,9,11)])/sqrt(diag(m2$var.coef))))*2 > 0.05)


## named integer(0)
sqrt(m2a$sigma2) # Compute residual standard error (sigma a)
## [1] 0.1934326
Box.test(m2a$residuals,lag=12,type='Ljung')
##
## Box-Ljung test
##
## data: m2a$residuals
## X-squared = 2.942, df = 12, p-value = 0.9959
pv=1-pchisq(2.942,6) # 12- 6 degrees of freedom
pv # residual looks like white noice.
## [1] 0.8160905
predict(m2a,5)
## $pred
## Time Series:
20

##
##
##
##
##
##
##
##
##
##
##

Start = 812
End = 816
Frequency = 1
[1] 5.292487 5.276253 5.310830 5.258342 5.305775
$se
Time Series:
Start = 812
End = 816
Frequency = 1
[1] 0.1934326 0.2735550 0.3606186 0.4472223 0.5346188

m3=arima(rate,order=c(2,1,1),seasonal=list(order=c(1,0,1),period=12))
m3
##
##
##
##
##
##
##
##
##
##

Call:
arima(x = rate, order = c(2, 1, 1), seasonal = list(order = c(1, 0, 1), period = 12))
Coefficients:
ar1
ar2
0.6026 0.2329
s.e. 0.0600 0.0382

ma1
-0.6010
0.0552

sigma^2 estimated as 0.03571:

sar1
0.5507
0.0670

sma1
-0.8186
0.0481

log likelihood = 198.48,

tsdiag(m3,gof=24)

21

aic = -384.96

Standardized Residuals

200

400

600

800

Time

0.4
0.0

0.2

ACF

0.6

0.8

1.0

ACF of Residuals

10

15

20

25

Lag

0.6
0.4
0.2
0.0

p value

0.8

1.0

p values for LjungBox statistic

10

15
lag

22

20

30

predict(m3,4)
##
##
##
##
##
##
##
##
##
##
##
##
##

$pred
Time Series:
Start = 812
End = 815
Frequency = 1
[1] 5.297543 5.329502 5.340351 5.307965
$se
Time Series:
Start = 812
End = 815
Frequency = 1
[1] 0.1889781 0.2674703 0.3550340 0.4401560

Reference:
Tsay, Ruey S. Analysis of financial time series. Vol. 543. John
Wiley & Sons, 2005.

23

Das könnte Ihnen auch gefallen