##L05- Linear Mixed Models## library(ggplot2) g <- ggplot(data.frame(x = c(85, 115)), aes(x)) + stat_function(fun = dnorm, args = list(mean = 100, sd = sqrt(20)), lwd = 1.2) + ylab(expression(paste("Probability Density - ", italic(f(y))))) + xlab(expression(paste(italic(y)))) + stat_function(fun = dnorm, args = list(mean = 100, sd = sqrt(20)), xlim = c(89.95,90.05), geom = "area", color = "indianred1", fill = "indianred1", alpha = 1.0) + theme_bw() + theme(axis.text=element_text(size=12), axis.title=element_text(size=14,face="bold"), legend.title=element_text(size=14,face="bold"), legend.text=element_text(size=12)) g #curve in the probability density ## ---- echo = FALSE, out.width = "90%", fig.width = 7, fig.align = "center", message = FALSE, warning = FALSE---- library(MASS) set.seed(4) sim_size <- 500 sim_mean <- 87 sim_var <- 10 y <- rnorm(sim_size, mean = sim_mean, sd = sqrt(sim_var)) mydf <- data.frame(y = y) g <- ggplot(data = mydf) + geom_histogram(aes(y, ..density..), alpha = 0.4, fill = "mediumpurple3", color = "black", binwidth = diff(range(y))/35) + geom_rug(aes(y), color = "mediumpurple4", alpha = 0.4) + theme_bw() + theme(axis.text=element_text(size=12), axis.title=element_text(size=14,face="bold"), legend.title=element_text(size=14,face="bold"), legend.text=element_text(size=12)) + xlab(expression(bold(italic(y)))) + ylab("Probability Density") + coord_cartesian(xlim = c(sim_mean - 5*sqrt(sim_var), sim_mean + 5*sqrt(sim_var)), ylim = c(0, 0.25)) g ## ---- echo = FALSE, out.width = "90%", fig.width = 7, fig.align = "center", message = FALSE, warning = FALSE---- g <- ggplot(data = mydf) + geom_histogram(aes(y, ..density..), alpha = 0.4, fill = "mediumpurple3", color = "black", binwidth = diff(range(y))/35) + stat_function(fun = dnorm, lwd = 1.2, xlim = c(sim_mean - 5*sqrt(sim_var), sim_mean + 5*sqrt(sim_var)), args = list(mean = mean(y)*0.9, sd = sd(y)*0.55)) + geom_rug(aes(y), color = "mediumpurple4", alpha = 0.4) + theme_bw() + theme(axis.text=element_text(size=12), axis.title=element_text(size=14,face="bold"), legend.title=element_text(size=14,face="bold"), legend.text=element_text(size=12)) + xlab(expression(bold(italic(y)))) + ylab("Probability Density") + coord_cartesian(xlim = c(sim_mean - 5*sqrt(sim_var), sim_mean + 5*sqrt(sim_var)), ylim = c(0, 0.25)) g ## ---- echo = FALSE, out.width = "90%", fig.width = 7, fig.align = "center", message = FALSE, warning = FALSE---- g <- ggplot(data = mydf) + geom_histogram(aes(y, ..density..), alpha = 0.4, fill = "mediumpurple3", color = "black", binwidth = diff(range(y))/35) + stat_function(fun = dnorm, lwd = 1.2, xlim = c(sim_mean - 5*sqrt(sim_var), sim_mean + 5*sqrt(sim_var)), args = list(mean = mean(y)*1.08, sd = sd(y)*2)) + geom_rug(aes(y), color = "mediumpurple4", alpha = 0.4) + theme_bw() + theme(axis.text=element_text(size=12), axis.title=element_text(size=14,face="bold"), legend.title=element_text(size=14,face="bold"), legend.text=element_text(size=12)) + xlab(expression(bold(italic(y)))) + ylab("Probability Density") + coord_cartesian(xlim = c(sim_mean - 5*sqrt(sim_var), sim_mean + 5*sqrt(sim_var)), ylim = c(0, 0.25)) g ## ---- echo = FALSE, out.width = "90%", fig.width = 7, fig.align = "center", message = FALSE, warning = FALSE---- myfit <- fitdistr(y, "normal") g <- ggplot(data = mydf) + geom_histogram(aes(y, ..density..), alpha = 0.4, fill = "mediumpurple3", color = "black", binwidth = diff(range(y))/35) + stat_function(fun = dnorm, lwd = 1.2, xlim = c(sim_mean - 5*sqrt(sim_var), sim_mean + 5*sqrt(sim_var)), args = list(mean = myfit$estimate[1], sd = myfit$estimate[2])) + geom_rug(aes(y), color = "mediumpurple4", alpha = 0.4) + theme_bw() + theme(axis.text=element_text(size=12), axis.title=element_text(size=14,face="bold"), legend.title=element_text(size=14,face="bold"), legend.text=element_text(size=12)) + xlab(expression(bold(italic(y)))) + ylab("Probability Density") + coord_cartesian(xlim = c(sim_mean - 5*sqrt(sim_var), sim_mean + 5*sqrt(sim_var)), ylim = c(0, 0.25)) g #Lets Practice 01 #Generate a data sample from a normal distribution: dados_amostra <- rnorm(100, mean = 5, sd = 2) dados_amostra # Define the likelihood function for a normal distribution likelihood_normal <- function(params, dados) { mu <- params[1] # Parâmetro da média sigma <- params[2] # Parâmetro do desvio padrão # Calculate the probability density function of the normal distribution log_likelihood <- sum(dnorm(dados, mean = mu, sd = sigma, log = TRUE)) return(-log_likelihood) } # Use the “optim function” to find the mu and sigma values that #maximize the likelihood ajuste <- optim(c(mu = 1, sigma = 1), fn = likelihood_normal, dados = dados_amostra) # Result estimate_mu <- ajuste$par[1] estimate_sigma <- ajuste$par[2] estimate_mu estimate_sigma #Buiding the Model #Example: #Let 𝑦_(𝑖,𝑗,𝑘) denote the observed yield of the 𝑖th genotype (𝑖=1,2,3,…,20) #grown in the 𝑗th nitrogen level (𝑗=1,2) ,for replicate 𝑘 (𝑘=1,2) set.seed(4) n_geno <- 20 n_rep <- 2 mean <- 1.7 fixed_N <- 0.4 var_high_N <- 0.08 var_low_N <- 0.03 var_res <- 0.005 geno_names <- paste("G", 1:n_geno, sep = "") genotype <- factor(rep(geno_names, each = 2 * n_rep)) N_level <- factor(rep(rep(c("Low", "High"), each = n_rep), n_geno), levels = c("Low", "High")) y_fixed <- mean + (as.numeric(N_level) - 1) * fixed_N b_low <- rnorm(n_geno, mean = 0, sd = sqrt(var_low_N)) b_high <- rnorm(n_geno, mean = 0, sd = sqrt(var_high_N)) y_random <- rep(c(rbind(b_low, b_high)), each = n_rep) y_error <- rnorm(length(y_fixed), mean = 0, sd = sqrt(var_res)) y <- y_fixed + y_random + y_error dat <- data.frame(genotype = genotype, N = N_level, yield = y) library(ggplot2) g <- ggplot(dat, aes(x = N, y = yield)) + geom_boxplot() + ylab("Yield") + xlab("Nitrogen Level") + theme_bw() + theme(axis.text=element_text(size=12), axis.title=element_text(size=14,face="bold"), legend.title=element_text(size=14,face="bold"), legend.text=element_text(size=12)) g fm1 <- lm(yield ~ N, data = dat) anova(fm1) ## ---- out.width = "85%", fig.width = 7, fig.align = "center", echo=FALSE---- g <- ggplot(dat, aes(x = genotype, y = yield)) + geom_boxplot() + ylab("Yield") + xlab("Genotype") + theme_bw() + theme(axis.text=element_text(size=10), axis.title=element_text(size=14,face="bold"), legend.title=element_text(size=14,face="bold"), legend.text=element_text(size=12)) g #anova fm2 <- lm(yield ~ N + genotype, data = dat) anova(fm2) ##Buiding the Model (Mixed Models) library(nlme) fm3 <- lme(yield ~ N, random = list(genotype = ~ 1), data = dat) fm3 getVarCov(fm3) fm3$sigma^2 anova(fm3) fm3$coefficients$random$genotype #Information Criteria fm1 <- gls(yield ~ N, method="REML", dat) anova(fm1, fm3) # Lets Practice 02 #data install.packages("agridat") library(agridat) data(package = "agridat") data("name of dataset") fusarium <- read.csv("C:\\Users\\Usuario\\Desktop\\ESALQ\\Disciplinas\\2023\\Pós Graduação\\LGN5822_Biometria aplicada à Genética\\Michele\\Lectures\\Lecture05 - Linear Mixed Models\\Practice\\Fusarium.csv", sep = ",") fusarium library(ggplot2) g <- ggplot(fusarium, aes(x = strain, y = severity)) + geom_boxplot(aes()) g #Which models can we fit? #Traditionals models #mean fm1 <- lm(severity ~ 1, data = fusarium) anova(fm1) #Year fm2 <- lm(severity ~ year, data = fusarium) anova(fm2) #Year+strain fm3 <- lm(severity ~ year + strain, data = fusarium) anova(fm3) #Year*strain fm4 <- lm(severity ~ year*strain, data = fusarium) anova(fm4) #anova together anova(fm1, fm2, fm3, fm4) #Mixed Models library(nlme) fm5 <- lme(severity ~ year + strain, random = list(genotype = pdIdent(~ 1)), data=fusarium) fm6 <- lme(severity ~ year + strain, random = list(genotype = pdIdent(~ strain - 1)), data=fusarium) fm7 <- lme(severity ~ year + strain, random = list(genotype = pdDiag(~ strain - 1)), data=fusarium) anova(fm5, fm6, fm7) fm5 fm6 fm7 # or fm8 <- lme(severity ~ year*strain, random = list(genotype = pdIdent(~ 1)), data=fusarium) fm9 <- lme(severity ~ year*strain, random = list(genotype = pdIdent(~ strain - 1)), data=fusarium) fm10 <- lme(severity ~ year*strain, random = list(genotype = pdDiag(~ strain - 1)), data=fusarium) anova(fm8, fm9, fm10) fm8 fm9 fm_full <- lme(severity ~ year*strain, random = list(genotype = pdDiag(~ strain - 1)), data=fusarium) fm_red <- lme(severity ~ year + strain, random = list(genotype = pdDiag(~ strain - 1)), data=fusarium) anova(fm_red, fm_full) anova(fm_full) getVarCov(fm6) fm6$sigma^2 fm6$coefficients$random$genotype