# Time Series Analysis Laboratory with R # Prof. Lea Petrella # Faculty of Economics # Department of Methods and Models for Economics, Territory and Finance # Sapienza University of Rome # a.y. 2019-2020 ##################################### ##################################### #Lesson 1: Introduction to R (Part 1) ##################################### #Index: # # 1)what is R? # 2)pros cons # 3)first steps: set the workbook # 3)download and installation of R and R studio # 4)minimum processing: R as calculator # 5)ask R for help # 6)assigning a name to an object # 7)how to verify the nature of an object R # 8)vectors # 9)path to search objects in R / installation and package loading # 10)matrices # 11)operations between scalars, vectors and matrices ##################################### #1)What is R? ##################################### # R is a language and environment for statistical computing and graphics # that is, an integrated environment that allows to: # - perform calculations; # - process data; # - describe even very complex statistical models; # - graphically represent the data; # - an object-oriented language, that is, made up of objects that # interact with each other through functions. # - key-sensitive. ##################################### #2)Pros and cons ##################################### #ADVANTAGES: # - it's a free and an open-source software environment, which means that more than one person can work on the same code in order to improve it. # DISADVANTAGES # - it's not easy to use; # - problems with large datasets ##################################### #3)how to download and install R ##################################### # # 1. from the website: http://www.r-project.org/ # 2. click on CRAN from the Download menu # 3. select a mirror # 4. select your operating system # 5. download or open the executable file # 6. proceed with the installation # ##################################### #3)how to download and install R studio (after installing R) ##################################### # 1. access the site https://www.rstudio.com/products/rstudio/download/ # 2. download the open source version of R studio suitable for your operating system # 3. install R Studio via the downloaded executable file ##################################### #3)how to set the working directory ##################################### #getwd () # It indicates the folder that has been set as working directory #setwd ("C: / ........") # changes the working directory (pay attention to the use of "/") # In Rstudio: Session -> set working directory ##################################### #4)minimum processing: R as calculator ##################################### # let's start with the minimum processing, that is the possibility of using R as a calculator 2 + 2 4-3 2 * 4 9/3 # multiple operations on the same line of the code are also possible 2 + 2; 4-1; 3 * 5; 14/3 ##################################### #5)ask R for help ##################################### ##It is possible to ask for help in different ways ###### 1. Through a keyword #help.search ("Logarithm") # (watch out for the syntax with the quotes! R is key sentive, which means that using capital letters is not the same as using small letters) ###### 1. Help for a specific command ? log log (100.10) # in base 10 # how many other numeric operations and with what syntax? # it migth be useful to take a look to the "See also" section help ( "Arithmetic") # alernatively ? arithmetic ## KEY SENSITIVE ! ? Arithmetic 2 ^ 3 # exponential 8 %% 5 # absolute value 8% /% 3 # whole quotient sqrt (4) 4 ^ 1/2 # nth root 8 ^ (1/3) ##################################### #6)assigning a name to an object ##################################### # How to assign a numeric scalar value to the object "a" a = 3 #but also b <- 2 1 -> c print (a) print (b) print (c) # alternatively b c #operations between objects c = a + b c # what happened to c = 1? # c is now a + b = 5, by overwriting we have lost what we had before! # don't assign the same name to a different object, as R overwrites new values on to the same object # how to see the objects we have "created"? ls () #In R Studio, the Environment window summarizes all the objects in the workspace # how to remove an object? to rm (a) to ls () # to remove everything: rm (list = ls ()) # in R there is no "undo" key! # to restore the deleted objects it is necessary to recreate them a = 3 b = 2 c = 1 ls () ##################################### #7) how to verify the nature of an object R ##################################### # SCALAR objects: # [1] numerical scalar elements: real or complex integers a = 10 b = 15:53 complex = 3:51 + 1.2i # [2] strings of characters d = "Hello" # [3] logical scalar elements f = TRUE g = FALSE h = F i = T 0/4 4/0 0/0 # NaN means Not a Number: an indeterminate form is.na (0/0) ##################################### #8)vectors ##################################### vector1 = c (1,2,3) vector2 = c ( "a", "b", "c") vector3 = c (f, g, h, i) empty = c () # operations on "numeric" vectors length (vector1) # length of a vector sum (vector1) # sum of the elements of a vector prod (vector1) # product of the elements of a vector min (vector1) # minimum between the elements of a vector max (vector1) # maximum between the elements of a vector vector1 * vector1 vector1 * 5 vector1 * c (1,2,3) vector1 * C (5,2) vector1 * c (1,2,1,2,1) # Warning message: # it's not a mistake ... it just warns you that something "strange" has happened # NOTE: the vectors that have been created are column vectors (even if it doesn't seem ...) t (vector1) # transposed by a vector t (vector1)% *% vector1 # scalar product t (vector1)% *% vector1 vector1% *% c (2.3) # ops! scalar product can only be calculated between vectors of same length # BMI EXAMPLE height = c (1.75, 1.80, 1.65, 1.90, 1.80, 1.71) weight = c (60, 72, 57, 90, 82, 72) bmi = weight / height ^ 2 # Body Mass Index # labels can be associated with elements of a vector height names (height) names = c ("Unit 1", "Unit 2", "Unit 3", "Unit 4", "Unit 5", "Unit 6") # a smarter way? names = paste ("Unit", 1: length (height), sep = "") # Note: R works with recursive logic names (height) = names height # what if I wanted to remove the labels? names (height) = NULL # Seq () function help (seq) v1 = seq (1,10) v1 # alternatively ... vv1 = 1:10 vv1 v2 = seq (0, 9.9, by = 0.7) v2 # alternatively ... vv2 = seq (0, 9.9, 0.7) VV2 v3 = seq (0.10, length = 21) v3 # Rep () function help (rep) v4 = rep (10, 100) v4 v5 = rep (c (1,2), 10) v5 vv5 = rep (c (1,2), length = 9) v6 = rep (a, 3) v7 = rep (vector2, 4) v8 = rep (c (0,1), c (2,8)) v8 #Sum () function # Adds the elements of a vector and returns its value sum (weight) sum (height) #How calculate the average height and the average weight mean (height) # and the variance? var (height) # The var () function calculates the sample variance # How can we calculate variance in population? #Cbind () function #Chain function for columns cbind (height, weight) cbind (height, rep (1,3)) #OPS #Rbind () function #Chain function for lines rbind (height, weight) rbind (weight, seq (1,12,1.2)) # Missing values? height2 = c (1.75, 1.80, 1.65, 1.90, 1.80, NA, 1.75) weight2 = c (60, 72, 57, 90, 82, NA, 69) mean (height2) # ops !!! var (height2) #try with the option na.rm #linear correlation cor (height2, weight2) cor (height2, weight2) # ops !! # na.rm doesn't work for cor function -> help (cor) -> use = "complete.obs" # extraction of a subset of data # height2 [1: 2] # help ("subset") ##################################### #9)path to search objects in R / installation and package loading ##################################### # to see the memorized objects ls () #in RStudio there is a window summarizing the objects. search () # Workspace and list of currently loaded packages #".GlobalEnv" contains the last created objects # How to dispose of the functions from a package: # Two steps: # 1. Install (one time) ---> install.packages command # 2. Load (every time that R is opened) ---> library command # ... what happens when a package is uploaded? library (MASS) # in order to install the packages: install.packages ( "MASS") # alternatively: # Packages & Data / install packages ... # In R-studio it is possible to load and install packages from the graphics window. ################################################## ####################### # # # TIP: Avoid giving names of already defined functions (NO rep, seq, order, c, etc etc) # ################################################## ####################### ##################################### #10)matrices ##################################### ? matrix # Define a matrix with equal terms matrix = matrix (1, nrow = 3, ncol = 2) matrix matrix.test = matrix (c (1,2,3,4,5), ncol = 3, nrow = 3) matrice.prova matrix.test1 = matrix (c (1,2,3,4,5), ncol = 3) matrice.prova1 matrix1 = matrix (c (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20), nrow = 5 , ncol = 4) array1 matrix1 = matrix (1:20, nrow = 5) # ncol fixed! ncol = 4 array1 rownames (array1) colnames (array1) colnames (matrix1) = c ( "column-1", "column-2", "column-3", "column-4") # a more "elegant" way to do the same thing # remember the paste () function colnames (matrix1) = paste ( "column", 1: 4, sep = "-") # ... same thing for the rows rownames (matrix1) = paste ( "row", 1: 5, sep = "-") array1 # sort values by row matrix2 = matrix (1:20, nrow = 5, byrow = T) # the option byrow = T fills the first line and then goes to the second, and so on rownames (matrix2) = paste ( "row", 1: 5, sep = "-") colnames (matrix2) = paste ( "column", 1: 4, sep = "-") matrix2 matrice3 = matrix (1: 5, ncol = 2, nrow = 5) matrice3 matrix4 = matrix (1: 5, ncol = 2, nrow = 4) # auto-complete! matrix4 dim (matrix) nrow (matrix) ncol (matrix) length (matrix) # ---> considers the "matrix" object as a vector ? length # Diagonal matrix matrix5 = diag (1, 5) matrice5 matrix6 = diag (c (18.56.3.79.67), 5) matrice6 ##################################### #11)operations between scalars, vectors and matrices ##################################### # product for a scalar 3 * vector1 3 * array1 #element by element product vector1 * vector1 c (1,2,3,4,5) * array1 array1 * c (1,2,3,4,5) # Transpose a vector / matrix / data.frame t (vector1) # line vector! t (array1) t (df1) # scalar / matrix product: vector1% *% vector1 # [1,3] x [3,1] = [1,1] t (vector1)% *% vector1 # [1,3] x [3,1] = [1,1] vector1% *% t (vector1) # [3,1] x [1,3] = [3,3] t (vector1)% *% t (vector1) # [1,3] x [1,3] ---> incompatible elements! c (1,2,3,4,5)% *% matrix1 # [1,5] x [5,4] = [1,4] matrix 1% *% c (1,2,3,4,5) # [5,4] x [5,1] (or [5,4] x [1,5]) ---> OPS! matrix 1% *% c (1,2,3,4) # [5,4] x [4,1] = [5,1] # NOTE: vectors created with the c () function are column vectors. # When scalar products are computed, the transposition is done automatically determinant of a matrix x = matrix (0: 3, ncol = 2) x det (x) # inversion of a non-singular square matrix solve (x) solve (x)% *% x # how to display the main diagonal of a matrix diag (x) # how to calculate the trace of a matrix sum (diag (x)) # alternatively library (psych) tr (x) # sum by rows rowSums (x) # sum by columns colSums (x) # we can use the "apply" command to perform simple row and / or column functions #help (apply) apply (df1, 2, mean) apply (df1, 2, mean, na.rm = TRUE) # To exit : q ()