####Lecture02 = Introduction to R######## #Getting help with R help() help(seq) ?mean help(package="agricolae") help(package="ExpDes") help(package="ggplot2") #help.search( ) help.search("histogram") help.search("dispersão") help.search("t.test") help.search("anova") #install packages install.packages("BGLR") install.packages("MASS") install.packages("lme4") install.packages(c("ggplot2", "dplyr", "tidyr")) #library package library("ggplot2") library("lme4") require("devtools") # swirl package install.packages("swirl") require("swirl") swirl() #bye( ): to exit swirl #R Help on the Internet #https://search.r-project.org/ #http://www.rseek.org #Mailing Lists #https://www.r-project.org/mail.html #R as a calculator ##arithmetic operators #sum 1+4 2+5 #subtract 6-3 8-4 #divide 10/2 10/3 #multiplication 2*3 4*1.2 #exponential 2^2 2**2 #square root sqrt(16) sqrt(64) 9^0.5 #or sqrt(9) #log log(10) log(10,base = 10) log(10,base = 2) #----------- ##comparison operators 3*2==5 #equal to (# caution: ’==’ is different than ’=’) 4!=1 #not equal to 10>4 #greater than 3<5 #less than 10>=10 #greater than or equal to 12<=10 #less than or equal to 10!=5 #different than a <- 5 b <- 3 a == b a != b a < b a > b a <= b a >= b #----------- #parentheses are important 4*5+2-3/8 4*(5+2)-(3/8) 4*(5+2-3/8) ((4*5)+2-3)/8 #-------------- #Summary Statistics d <- c(10, 8, 12, 7, 6, 9, 8, 13, 10, 7) mean(d) median(d) var(d) sd(d) #========== #Simple functions ##logical operators 40 & 5 > 30 40 | 5 > 30 1<0 1==0 1>0 p <- TRUE q <- FALSE p & q p | q #Functions #?functionname ?log ?seq ?rep #seq( ) function x <- seq(1, 10) x seq(from=0, to=100, by=3) #we can write in this way, or seq(0,100,3) #make sequence #Create a sequence of integers from 1 to 10 x <- seq(1, 10) x #Create a sequence of integers from 0 to 20, in increments of 2: y <- seq(0, 20, by = 2) y #Create a descending sequence from 10 to 1: z <- seq(10, 1, by = -1) z #rep function ?rep rep(x=1,times=3) #or rep(1,3) # Repeat value 3 five times x <- rep(3, times = 5) x #Repeat vector [1, 2, 3] twice: y <- c(1, 2, 3) z <- rep(y, times = 2) z #========= #R creating objects x=10 x*2 x^5 x+2 # or x<-10 x*2 x^5 x+2 x<-5 x*5 x^5 x+1 x_name = "name" x_name #To see which type your object is class(x) class(x_name) #========= #R Data Types #Vectors #Numerical Vector x<- c(1, 2, 3, 4, 5) x #Character Vector (Strings) y<-c("apple","banana","orange","grape") y #Logical Vector (Boolean) z<- c(TRUE, FALSE, TRUE, FALSE) z #Vector with Numeric Sequence x<- seq(1, 10) x #Repeating Vector x<- rep(0, times = 5) x #Vector with Randomly Generated Values x<- runif(5) x #Vector Manipulation #Two vectors of same length can be added, subtracted, multiplied or divided giving the result as a vector output. # Create two vector v1 <- c(3,8,4,5,0,11) v2 <- c(4,11,0,8,1,2) # Vector addition v <- v1+v2 v # Vector subtraction v <- v1-v2 v # Vector multiplication v <- v1*v2 v # Vector division v <- v1/v2 v #Math on vectors x <- c(10, 20, 40, 80) x + 10 x * 2 y <- c(0, -10, 5, -20) x + y #Vector Element Sorting v <- c(3,8,4,5,0,11,-9,304) # Sort the elements of the vector v <- sort(v) v # Sorting character vectors v <- c("Red","Blue","Yellow","Violet") v <- sort(v) v # Naming Vectors yield <- c(150, 200, 225) names(yield) <- c("Piracicaba", "Limeira", "Campinas") yield yield["Piracicaba"] # Vectors (continue) Examples: x = c(1.5,2.1,2.5,3.4,4.3,6.1) #A vector with numeric value y = c("A","A","B","B","C","C") #A vector with character values str(x) str(y) sum(x) #the sum of the vector objects mean(x) #the mean of the vector objects var(x) #the variance between the vector objects sum(y) #returns error, because y is not numeric #To do #Exercise 1 x <- runif(10) x sum(x) mean(x) var(x) #all numbers less than 0.05 x[x<0.05] #all numbers are greater than or equal to 0.5 x[x>=0.5] #Exercise 2 seq(1,10,0.1) #Creates a vector from 1 to 10 by 0.1 of interval sample(seq(1,10,0.1),4) #Sample from the seq(1,10,0.1) 4 numbers #========= #Matrices #A Matrix is created using the matrix() function ?matrix # Create a 2x3 matrix from a vector x<- c(1, 2, 3, 4, 5, 6) y<- matrix(x, nrow = 2, ncol = 3) y matrix(0, 2, 3) (vals <- 1:6) (x <- matrix(vals, 2, 3)) dim(x) #or (vals <- 1:6) (y <- matrix(vals, 2, 3, byrow = TRUE)) dim(y) # Elements are arranged sequentially by row M <- matrix(c(3:14), nrow = 4, ncol = 3, byrow = TRUE) M # Elements are arranged sequentially by column N <- matrix(c(3:14), nrow = 4, ncol = 3, byrow = FALSE) N # Define the column and row names rownames = c("row1", "row2", "row3", "row4") colnames = c("col1", "col2", "col3") x <- matrix(c(3:14), nrow = 4, ncol = 3, byrow = TRUE, dimnames = list(rownames, colnames)) x #Accessing Elements of a Matrix # Access the element at 3rd column and 1st row x[1,3] # Access the element at 2nd column and 4th row x[4,2] # Access only the 2nd row x[2, ] # Access only the 3rd column x[,3] #Mathematical Operations with Matrices #Matrix Addition & Subtraction # Create two 2x3 matrices matrix1 <- matrix(c(3, 9, -1, 4, 2, 6), nrow = 2) matrix1 matrix2 <- matrix(c(5, 2, 0, 9, 3, 4), nrow = 2) matrix2 # Add the matrices M <- matrix1 + matrix2 M # Subtract the matrices M <- matrix1 - matrix2 M # Multiply the matrices M <- matrix1 * matrix2 M # Divide the matrices M <- matrix1 / matrix2 M #Matrix Algebra matrix1 <- matrix(c(3, 9, -1, 4), nrow = 2) matrix1 #Transposed Matrix y <- t(matrix1) y #Matrix Determinant y <-det(matrix1) y #Inverse of matrix y<- solve(matrix1) y #To do #Create matrices M and N in R v1 <- c(5,9) v2 <- c(-4,1) v3 <- c(8,0) M <- rbind(v1, v2, v3) M v1 <- c(-4,1) v2 <- c(-2,0) v3 <- c(0,5) N <- rbind(v1, v2, v3) N #Get the transpose of M and N t(M) t(N) #Add M and N result <- M+N result #Subtract N from M result1 <- N-M result1 ###cbind v1 <- c(5,9) v2 <- c(-4,1) v3 <- c(8,0) M <- cbind(v1, v2, v3) M v1 <- c(-4,1) v2 <- c(-2,0) v3 <- c(0,5) N <- cbind(v1, v2, v3) N #--------- ##Data frame ?data.frame # Criar um data frame com três colunas name <- c("Ana", "João", "Maria") age <- c(25, 30, 28) score <- c(85, 90, 88) df <- data.frame(name, age, score) df df <- data.frame( Ind = c(1:10), Yield = c(1.80, 1.77, 1.71, 1.65, 1.66, 1.63, 1.77, 2.58, 2.15, 1.56), Resis = c(TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, TRUE, TRUE, FALSE, FALSE) ) #========= #Graphic Plotting #Scatter Plot ?plot # Data vectors x <- c(1, 2, 3, 4, 5) y <- c(3, 5, 7, 8, 10) # Create the scatter plot plot(x, y, main = "Maize summer", xlab = "yield", ylab = "years", col = "blue",pch = 19) #### x <- seq(0, 20, 0.5) y <- x^2 plot(x, y, main = "Scatterplot example") #Histogram temperatures <- c(67 ,72 ,74 ,62 ,76 ,66 ,65 ,59 ,61 ,69 ) # histogram of temperatures vector result <- hist(temperatures, main = "Histogram of Temperature", xlab = "Temperature in degrees Fahrenheit") #BoxPlots age_class <- c(21,22,24,18,19,27,22,22,23,21,23,21) summary(age_class) boxplot(age_class, main="Boxplot: age_class", col="Pink") #Barplots #Example: max.temp <- c(22, 27, 26, 24, 23, 26, 28) barplot(max.temp, main = "Maximum Temperatures in a Week", xlab = "Degree Celsius", ylab = "Day", names.arg = c("Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"), col = "Blue") #HeatMaps #Create an example matrix matriz <- matrix(rnorm(100), nrow = 10) #Create a matrix heatmap heatmap(matriz, col = heat.colors(256), main = "Example of HeatMap") #Working Directory getwd() setwd("C:/Users/Usuario/Desktop/Rtest") #or setwd("C:\\Users\\Usuario\\Desktop\\Rtest") #File formattig #Importing data into R #read.table(filename,header=FALSE,sep = "") #Example X<-read.table("C:\\Users\\Usuario\\Desktop\\Rtest\\data1.csv", header = T, sep = ";") X #Exporting data from R #write.table write.table(X, file = "C:\\Users\\Usuario\\Desktop\\Rtest\\result.csv") #Fitting Regression Model ?lm Y <- c(6.5,5.8,7.8,8.1,10.4,12.3,13.1,17.4,20.1,24.5,25.5,27.1) # response variable X <- c(1.4,1.5,1.7,1.9,2.1,2.2,2.4,3.2,3.7,4.2,4.8,5.2) # explanatory variable dados <- data.frame(Y,X) dados modelo.regressao <- lm(Y ~ X, data= dados) summary(modelo.regressao) # To obtain the coefficients of the linear regression line anova(modelo.regressao) # ANOVA plot (Y ~ X,pch=16 ,data = dados) abline(modelo.regressao,col="Red") # This function fits the model line to the data