##L05- Linear Mixed Models##

library(ggplot2)
g <- ggplot(data.frame(x = c(85, 115)), aes(x)) +
  stat_function(fun = dnorm, args = list(mean = 100, sd = sqrt(20)), lwd = 1.2) +
  ylab(expression(paste("Probability Density - ", italic(f(y))))) +
  xlab(expression(paste(italic(y)))) +
  stat_function(fun = dnorm, args = list(mean = 100, sd = sqrt(20)),
                xlim = c(89.95,90.05), geom = "area",
                color = "indianred1", fill = "indianred1", alpha = 1.0) +
  theme_bw() +
  theme(axis.text=element_text(size=12),
        axis.title=element_text(size=14,face="bold"),
        legend.title=element_text(size=14,face="bold"),
        legend.text=element_text(size=12))
g

#curve in the probability density 

## ---- echo = FALSE, out.width = "90%", fig.width = 7, fig.align = "center", message = FALSE, warning = FALSE----
library(MASS)
set.seed(4)
sim_size <- 500
sim_mean <- 87
sim_var <- 10
y <- rnorm(sim_size, mean = sim_mean, sd = sqrt(sim_var))
mydf <- data.frame(y = y)
g <- ggplot(data = mydf) +
  geom_histogram(aes(y, ..density..), alpha = 0.4,
                 fill = "mediumpurple3", color = "black",
                 binwidth = diff(range(y))/35) +
  geom_rug(aes(y), color = "mediumpurple4", alpha = 0.4) +
  theme_bw() +
  theme(axis.text=element_text(size=12),
        axis.title=element_text(size=14,face="bold"),
        legend.title=element_text(size=14,face="bold"),
        legend.text=element_text(size=12)) +
  xlab(expression(bold(italic(y)))) +
  ylab("Probability Density") +
  coord_cartesian(xlim = c(sim_mean - 5*sqrt(sim_var), sim_mean + 5*sqrt(sim_var)),
                  ylim = c(0, 0.25))
g

## ---- echo = FALSE, out.width = "90%", fig.width = 7, fig.align = "center", message = FALSE, warning = FALSE----
g <- ggplot(data = mydf) +
  geom_histogram(aes(y, ..density..), alpha = 0.4,
                 fill = "mediumpurple3", color = "black",
                 binwidth = diff(range(y))/35) +
  stat_function(fun = dnorm, lwd = 1.2, xlim = c(sim_mean - 5*sqrt(sim_var), sim_mean + 5*sqrt(sim_var)),
                args = list(mean = mean(y)*0.9, sd = sd(y)*0.55)) +
  geom_rug(aes(y), color = "mediumpurple4", alpha = 0.4) +
  theme_bw() +
  theme(axis.text=element_text(size=12),
        axis.title=element_text(size=14,face="bold"),
        legend.title=element_text(size=14,face="bold"),
        legend.text=element_text(size=12)) +
  xlab(expression(bold(italic(y)))) +
  ylab("Probability Density") +
  coord_cartesian(xlim = c(sim_mean - 5*sqrt(sim_var), sim_mean + 5*sqrt(sim_var)),
                  ylim = c(0, 0.25))
g

## ---- echo = FALSE, out.width = "90%", fig.width = 7, fig.align = "center", message = FALSE, warning = FALSE----
g <- ggplot(data = mydf) +
  geom_histogram(aes(y, ..density..), alpha = 0.4,
                 fill = "mediumpurple3", color = "black",
                 binwidth = diff(range(y))/35) +
  stat_function(fun = dnorm, lwd = 1.2, xlim = c(sim_mean - 5*sqrt(sim_var), sim_mean + 5*sqrt(sim_var)),
                args = list(mean = mean(y)*1.08, sd = sd(y)*2)) +
  geom_rug(aes(y), color = "mediumpurple4", alpha = 0.4) +
  theme_bw() +
  theme(axis.text=element_text(size=12),
        axis.title=element_text(size=14,face="bold"),
        legend.title=element_text(size=14,face="bold"),
        legend.text=element_text(size=12)) +
  xlab(expression(bold(italic(y)))) +
  ylab("Probability Density") +
  coord_cartesian(xlim = c(sim_mean - 5*sqrt(sim_var), sim_mean + 5*sqrt(sim_var)),
                  ylim = c(0, 0.25))
g

## ---- echo = FALSE, out.width = "90%", fig.width = 7, fig.align = "center", message = FALSE, warning = FALSE----
myfit <- fitdistr(y, "normal")
g <- ggplot(data = mydf) +
  geom_histogram(aes(y, ..density..), alpha = 0.4,
                 fill = "mediumpurple3", color = "black",
                 binwidth = diff(range(y))/35) +
  stat_function(fun = dnorm, lwd = 1.2, xlim = c(sim_mean - 5*sqrt(sim_var), sim_mean + 5*sqrt(sim_var)),
                args = list(mean = myfit$estimate[1], sd = myfit$estimate[2])) +
  geom_rug(aes(y), color = "mediumpurple4", alpha = 0.4) +
  theme_bw() +
  theme(axis.text=element_text(size=12),
        axis.title=element_text(size=14,face="bold"),
        legend.title=element_text(size=14,face="bold"),
        legend.text=element_text(size=12)) +
  xlab(expression(bold(italic(y)))) +
  ylab("Probability Density") +
  coord_cartesian(xlim = c(sim_mean - 5*sqrt(sim_var), sim_mean + 5*sqrt(sim_var)),
                  ylim = c(0, 0.25))
g

#Lets Practice 01

#Generate a data sample from a normal distribution:

dados_amostra <- rnorm(100, mean = 5, sd = 2)  
dados_amostra

# Define the likelihood function for a normal distribution

likelihood_normal <- function(params, dados) {
  mu <- params[1]  # Parâmetro da média
  sigma <- params[2]  # Parâmetro do desvio padrão
  
# Calculate the probability density function of the normal distribution

  log_likelihood <- sum(dnorm(dados, mean = mu, sd = sigma, log = TRUE))
  
  return(-log_likelihood) 
}

# Use the “optim function” to find the mu and sigma values that 
#maximize the likelihood

ajuste <- optim(c(mu = 1, sigma = 1), fn = likelihood_normal, dados = dados_amostra)

# Result
estimate_mu <- ajuste$par[1]
estimate_sigma <- ajuste$par[2]

estimate_mu
estimate_sigma



#Buiding the Model

#Example:
#Let 𝑦_(𝑖,𝑗,𝑘) denote the observed yield of the 𝑖th genotype (𝑖=1,2,3,…,20) 
#grown in the 𝑗th nitrogen level (𝑗=1,2) ,for replicate 𝑘 (𝑘=1,2)

set.seed(4)
n_geno <- 20
n_rep <- 2
mean <- 1.7
fixed_N <- 0.4
var_high_N <- 0.08
var_low_N <- 0.03
var_res <- 0.005
geno_names <- paste("G", 1:n_geno, sep = "")
genotype <- factor(rep(geno_names, each = 2 * n_rep))
N_level <- factor(rep(rep(c("Low", "High"), each = n_rep), n_geno), levels = c("Low", "High"))
y_fixed <- mean + (as.numeric(N_level) - 1) * fixed_N
b_low <- rnorm(n_geno, mean = 0, sd = sqrt(var_low_N))
b_high <- rnorm(n_geno, mean = 0, sd = sqrt(var_high_N))
y_random <- rep(c(rbind(b_low, b_high)), each = n_rep)
y_error <- rnorm(length(y_fixed), mean = 0, sd = sqrt(var_res))
y <- y_fixed + y_random + y_error
dat <- data.frame(genotype = genotype,
                  N = N_level,
                  yield = y)


library(ggplot2)
g <- ggplot(dat, aes(x = N, y = yield)) +
  geom_boxplot() +
  ylab("Yield") +
  xlab("Nitrogen Level") +
  theme_bw() +
  theme(axis.text=element_text(size=12),
        axis.title=element_text(size=14,face="bold"),
        legend.title=element_text(size=14,face="bold"),
        legend.text=element_text(size=12))
g


fm1 <- lm(yield ~ N, data = dat)
anova(fm1)

## ---- out.width = "85%", fig.width = 7, fig.align = "center", echo=FALSE----
g <- ggplot(dat, aes(x = genotype, y = yield)) +
  geom_boxplot() +
  ylab("Yield") +
  xlab("Genotype") +
  theme_bw() +
  theme(axis.text=element_text(size=10),
        axis.title=element_text(size=14,face="bold"),
        legend.title=element_text(size=14,face="bold"),
        legend.text=element_text(size=12))
g

#anova
fm2 <- lm(yield ~ N + genotype, data = dat)
anova(fm2)

##Buiding the Model (Mixed Models)

library(nlme)
fm3 <- lme(yield ~ N,
           random = list(genotype = ~ 1), data = dat)

fm3

getVarCov(fm3)
fm3$sigma^2

anova(fm3)

fm3$coefficients$random$genotype

#Information Criteria

fm1 <- gls(yield ~ N, method="REML", dat)
anova(fm1, fm3)

# Lets Practice 02

#data 
install.packages("agridat")
library(agridat)
data(package = "agridat")
data("name of dataset")


fusarium <- read.csv("C:\\Users\\Usuario\\Desktop\\ESALQ\\Disciplinas\\2023\\Pós Graduação\\LGN5822_Biometria aplicada à Genética\\Michele\\Lectures\\Lecture05 - Linear Mixed Models\\Practice\\Fusarium.csv", sep = ",") 
fusarium

library(ggplot2)
g <- ggplot(fusarium, aes(x = strain, y = severity)) + geom_boxplot(aes())
g

#Which models can we fit?
#Traditionals models
#mean

fm1 <- lm(severity ~ 1, data = fusarium)
anova(fm1)

#Year

fm2 <- lm(severity ~ year, data = fusarium)
anova(fm2)

#Year+strain
fm3 <- lm(severity ~ year + strain, data = fusarium)
anova(fm3)

#Year*strain
fm4 <- lm(severity ~ year*strain, data = fusarium)
anova(fm4)

#anova together
anova(fm1, fm2, fm3, fm4)

#Mixed Models

library(nlme)

fm5 <- lme(severity ~ year + strain, random = list(genotype = pdIdent(~ 1)), data=fusarium)
fm6 <- lme(severity ~ year + strain, random = list(genotype = pdIdent(~ strain - 1)), data=fusarium)
fm7 <- lme(severity ~ year + strain, random = list(genotype = pdDiag(~ strain - 1)), data=fusarium)
anova(fm5, fm6, fm7)

fm5
fm6
fm7

# or

fm8 <- lme(severity ~ year*strain, random = list(genotype = pdIdent(~ 1)), data=fusarium)
fm9 <- lme(severity ~ year*strain, random = list(genotype = pdIdent(~ strain - 1)), data=fusarium)
fm10 <- lme(severity ~ year*strain, random = list(genotype = pdDiag(~ strain - 1)), data=fusarium)
anova(fm8, fm9, fm10)

fm8
fm9

fm_full <- lme(severity ~ year*strain, random = list(genotype = pdDiag(~ strain - 1)), data=fusarium)
fm_red <- lme(severity ~ year + strain, random = list(genotype = pdDiag(~ strain - 1)), data=fusarium)
anova(fm_red, fm_full)

anova(fm_full)


getVarCov(fm6)
fm6$sigma^2
fm6$coefficients$random$genotype