# Time Series Analysis Laboratory with R # Prof. Lea Petrella # Faculty of Economics # Department of Methods and Models for Economics, Territory and Finance # Sapienza University of Rome # a.y. 2019-2020 ##################################### # Useful materials and links # R: https://www.r-project.org # RStudio: https://www.rstudio.com/home/ # Manual: The Art of R Programming # Repositories & Code: GitHub (https://github.com) ##################################### ######################################## # Lesson 4: R Laboratory (Part 2) ######################################## ##### THE LINEAR REGRESSION MODEL ##### # We use a well-known dataset, already in class: "AirPassengers" Y = AirPassengers # Firstly, we carry out an exploratory analysis of the data Y sum (is.na (Y)) # Number of missing data frequency (Y) # Frequency of observations summary (Y) # Summary of the time series par (mfrow = c (1,1)) plot (Y, xlab = "Date", ylab = "Passenger numbers (1000's)", main = "Air Passenger numbers from 1949 to 1961") # What do we observe? # We decompose the series into the seasonal and trend component additive.dec = decompose (Y, type = c ("additive")) # Y (t) = T (t) + S (t) + e (t) plot (additive.dec) # Let's look at the results of the additive decomposition multi.dec = decompose (Y, type = c ("multiplicative")) # Y (t) = T (t) * S (t) * e (t) plot (multi.dec) # Let's look at the results of the multiplicative decomposition #Plot the correlogram of Y, that is the sample autocorrelation function par (mfrow = c (2,1)) acf (Y) # indicates the amplitude and duration of the process memory pacf (Y) # indicates the autocorrelation at two different instants, keeping the variables between the two instants fixed # What about the ACF and PACF of a White Noise? acf (WN) PACF (WN) # We need it in order to evaluate whether the observed series presents autocorrelation or not # We regress the series on seasonal dummies and a linear trend t = 1: length (Y) # how to create the trend s = kronecker (rep (1, 12), diag (12)) # create the seasonal dummies # Fit a linear model reg1 = lm (Y ~ t) # regression with trend only summary (reg1) reg = lm (Y ~ s + t - 1) # regression with trend and seasonality summary (reg) par (mfrow = c (2,1)) res1 = reg1 $ residuals # regression residuals with trend plot.ts (res1) # graphic representation res = reg $ residuals # regression residuals with trend and seasonality plot.ts (res) # graphic representation # Are they distributed as a Gaussian v.c? Are they homosexedastic? standard.res = (res - mean (res)) / (var (res) ^ 0.5) # standardized residuals # alternatively, you can use the 'scale' command # test the hypothesis of normality for residuals. # Graphically, we expect residuals to behave like V.C. Gaussian par (mfrow = c (1,1)) plot (standard.res, main = "Standardized residue diagram") # Plot hist (standard.res, nclass = 20, freq = F) # Histogram curve (dnorm (x, mean = 0, sd = 1), col = "red", add = T) qqnorm (standard.res) # QQplot abline (0,1, col = "red") # test normality with the Jarque-Bera test statistic. # H0: the residuals are distributed as a v.c. Gaussian jarque.bera.test (standard.res) # Test normality with an alternative test # H0: the residues are distributed as a v.c. Gaussian shapiro.test (standard.res) # Test homoskedasticity hypothesis # graphically, we expect that the variance of the residuals does not vary over time plot (standard.res, main = "Diagram of standardized residues") # We use the Breusch-Pagan test # H0: residuals are homoskedastic library (lmtest) bptest (reg) # Test residuals autocorrelation # Visually you can draw the graph of the residuals (ordered) towards the previous residuals (abscissa) # which should not reveal any obvious patterns n <-length (standard.res) plot (standard.res [-n], standard.res [-1], pch = 16) # Are they similar to a White Noise? WN = RNorm (n) lines (WN [-n], WN [-1], type = "p", col = "red", lwd = 2) # An alternative way is to look at the residual acf par (mfrow = c (2,1)) acf (standard.res) # makes us think of an AR (2) process with complex roots acf (WN) # We use the Ljung-Box and Box-Pierce statistics (H0: no autocorrelation up to lag k) k = 12 Box.test (standard.res, lag = k, type = "Ljung-Box") # Ljung-Box test Box.test (standard.res, lag = k, type = "Box-Pierce") # Box-Pierce test # Alternatively, it is possible to run the Durbin-Watson test dwtest (reg) # NB: only valid for order 1 autocorrelation (at lag 1) # ********************************************************************************* # transform data to remove trend and seasonality components # apply the logarithm function to reduce data variability logY = log (Y) par (mfrow = c (2,1)) plot (Y, main = "Air") plot (logY, main = "log") # plot the ACF and ACF of a WN process acf (logY, lag.max = 30) acf (WN, lag.max = 30) #calculate the 12th order differences to remove seasonality d12logY = diff (logY, lag = 12) plot (d12logY) acf (d12logY) # calculate the first order differences dd12logY = diff (d12logY) acf (dd12logY) #par (mfrow = c (1,2)) par (mfrow = c (2,1)) plot (d12logY, main = "diff 12") plot (dd12logY, main = "diff (diff12)") # Another example: library (quantmod) # Load data getSymbols (Symbols = "AAPL", from = "2018-01-01") par (mfrow = c (2,1)) plot (AAPL $ AAPL.Close) # calculate daily returns Return diff = (log (AAPL $ AAPL.Close)) plot (Return) # Plot ACF of prices and returns acf (AAPL $ AAPL.Close) acf (Return $ AAPL.Close [-1]) # Are prices stationary? How do we make them stationary? # ************************************************* ********************************** #####TIME SERIES SIMULATION WITH TREND AND ACF AND PACF VERIFICATION ##### # Choose parameters alpha = 2.0 beta = 0.3 # Simulate a series of 200 observations N = 200 Sim = c () for (i in 1: N) { Sim [i] = alpha + beta * i + rnorm (1, mean = 0, sd = 2) } par (mfrow = c (1,1)) plot.ts (Sim) # graphical analysis abline (alpha, beta, col = "red") # the straight line "true" # Study the autocorrelation function and the partial autocorrelation function par (mfrow = c (2,1)) acf (Sim) PACF (Sim) # Does the ACF look like a White Noise process? How can we make it look like a White Noise? HOMEWORK: estimate a regression model and analyse the residuals