---
  title: "LGN0313 Melhoramento Genético - Prof. Roberto Fritsche Neto"
author: "Júlia Silva Morosini"
date: "22/06/2018"

## ---- Loading and exploring t4 data---------------------------------------------------------------------------
setwd("~/Documents/Doutorado ESALQ/PAE 2018-1/")
t4 <- read.table("T2.txt", h=T) # t4 representa a turma de sexta-feira, cujo arquivo é o T2.txt
str(t4)
t4$Bloco <- as.factor(t4$Bloco)
str(t4)

library(lattice) # plots (xyplot)
library(gridExtra) # plot layout (grid.arrange)

# Hybrid
p1 = xyplot(NF ~ reorder(Hibrido, NF), data = t4, xlab = "Genotype", ylab = "NF", col = t4$Hibrido, 
       type=c("p", "a"), cex = 1.3, lwd = 3, pch=20)
p2 = xyplot(AP ~ reorder(Hibrido, AP), data = t4, xlab = "Genotype", ylab = "AP", col = t4$Hibrido,
       type=c("p", "a"), cex = 1.3, lwd = 3, pch=20)
grid.arrange(p1,p2, ncol=2)

# Block
p3 = xyplot(NF ~ reorder(Bloco, NF), data = t4, xlab = "Block", ylab = "NF", col = t4$Bloco, 
       type=c("p", "a"), cex = 1.3, lwd = 3, pch=20)
p4 = xyplot(AP ~ reorder(Bloco, AP), data = t4, xlab = "Block", ylab = "AP", col = t4$Bloco, 
       type=c("p", "a"), cex = 1.3, lwd = 3, pch=20)
grid.arrange(p3,p4, ncol=2)

# pehnotypic correlation NF vs AP
library("PerformanceAnalytics")
chart.Correlation(as.matrix(t4[,c(3,4)]), histogram = TRUE, pch = 19)
library("ggplot2")
ggplot(t4, aes(NF, AP, color = Hibrido)) + geom_point(show.legend = T) + xlab("Genotypes") +
  ylab("NAE") + stat_smooth(method = "lm", col = "red")

## ---- ANOVA and h2--------------------------------------------------------------------------------------------

# NF
tapply(t4$NF, t4[,1:2], mean)

fm1.nf <- aov(NF ~ Bloco + Hibrido, data = t4) # another one is lm()
anova(fm1.nf)

(r = length(levels(t4$Bloco))) # blocks

(Vg.nf <- round((anova(fm1.nf)[[3]][2] - anova(fm1.nf)[[3]][3])/r,4))
(Ve.nf <- round(anova(fm1.nf)[[3]][3],4))
(h.nf <- round(Vg.nf / (Vg.nf + Ve.nf/r),4))
(CV.nf <- round(sqrt(anova(fm1.nf)[[3]][3])/mean(t4$NF)*100,4))

# AP
tapply(t4$AP, t4[,1:2], mean)

fm1.ap <- aov(AP ~ Bloco + Hibrido, data = t4)
anova(fm1.ap)

(Vg.ap <- round((anova(fm1.ap)[[3]][2] - anova(fm1.ap)[[3]][3])/r,4))
(Ve.ap <- round(anova(fm1.ap)[[3]][3],4))
(h.ap <- round(Vg.ap / (Vg.ap + Ve.ap/r),4))
(CV.ap <- round(sqrt(anova(fm1.ap)[[3]][3])/mean(t4$AP)*100,4))

rs = data.frame("NF" = c(Vg.nf, Ve.nf, h.nf, CV.nf),
                     "AP" = c(Vg.ap, Ve.ap, h.ap, CV.ap))
rownames(rs) = c("Vg", "Ve", "h2", "CV"); rs

### Faster way to generate this same table:

a = list(anova(fm1.nf), anova(fm1.ap))
rs.f = matrix(NA,4,2)
for (i in 1:2) {
rs.f[1,i] <- Vg <- round((a[[i]][[3]][2] - a[[i]][[3]][3])/r,4)
rs.f[2,i] <- Ve <- round(a[[i]][[3]][3],4)
rs.f[3,i] <- h <- round(Vg / (Vg + Ve /r),4)
  for(j in 1:2){
  rs.f[4,i] <- CV <- round(sqrt(a[[i]][[3]][3])/mean(t4[,2+i])*100,4)
  }
rownames(rs.f) = c("Vg", "Ve", "h2", "CV")
colnames(rs.f) = c("NF", "AP")
}
rs.f

# Prepare data for plotting Interaction hybrid x block
t4b <- rbind(t4,t4)
t4b[,"Trait"] <- c(rep(paste("NF"),20),rep(paste("AP"),20))
t4b[,"Valor"] <- c(t4[1:20,"NF"],t4[1:20,"AP"])
t4b <- t4b[,-c(3,4)]

ggplot(data=t4b, aes(x=Bloco, y=Valor, color = Hibrido)) +
  geom_line(aes(group = Hibrido)) + geom_point() +
  labs(x = "Blocks", y = "Value", color = "Hybrids") +
  #facet_wrap(~Trait, scales="free") +
  facet_grid(Trait ~ ., scales="free") 

## ---- Mean test-----------------------------------------------------------------------------------------------

# a) only for models fitted using aov() 
(tukey.nf <- TukeyHSD(x = fm1.nf, which = "Hibrido", ordered = T, conf.level = 0.95))
(tukey.ap <- TukeyHSD(x = fm1.ap, which = "Hibrido", ordered = T, conf.level = 0.95))

# b) using lm()

#install.packages("emmeans")
library(emmeans)

# NF
fm2.nf <- lm(NF ~ Bloco + Hibrido, data = t4)
anova(fm2.nf)
(tk.nf <- emmeans(fm2.nf,"Hibrido"))
cld(tk.nf, adjust = "tukey", Letters=letters)

# AP
fm2.ap <- lm(AP ~ Bloco + Hibrido, data = t4)
anova(fm2.ap)
(tk.ap <- emmeans(fm2.ap,"Hibrido"))
cld(tk.ap, adjust = "tukey", Letters=letters)

p.nf = plot(tk.nf, xlab = "NF emmeans", ylab = "Hybrids")
p.ap = plot(tk.ap, xlab = "AP emmeans", ylab = "Hybrids")
grid.arrange(p.nf,p.ap, ncol=2)

## ---- Response to selection-----------------------------------------------------------------------------------

# NF
(Xo.NF <- mean(t4$NF))
(Xs.NF <- mean(sort(tapply(t4$NF, t4$Hibrido, mean), decreasing = T)[1:2]))
(DS.NF <- Xs.NF - Xo.NF)
(GS.NF <- round(DS.NF * h.nf, 2))
(GS.NF.per <- round(GS.NF/Xo.NF*100,2))
(Xm.NF <- Xo.NF + GS.NF)

# AP
(Xo.AP <- round(mean(t4$AP),2))
(Xs.AP <- round(mean(sort(tapply(t4$AP, t4$Hibrido, mean), decreasing = F)[1:2]),2))
(DS.AP <- Xs.AP - Xo.AP)
(GS.AP <- round(DS.AP * h.ap, 2))
(GS.AP.per <- round(GS.AP/Xo.AP*100,2))
(Xm.AP <- Xo.AP + GS.AP)

rs2 = data.frame("NF" = c(Xo.NF, Xs.NF, DS.NF, GS.NF, GS.NF.per, Xm.NF),
                "AP" = c(Xo.AP, Xs.AP, DS.AP, GS.AP, GS.AP.per, Xm.AP))
rownames(rs2) = c("Média total", "Média gen. selecionados", "Diferencial de Seleção",
                  "Ganho de seleção","Ganho de seleção %", "Média pop. melhorada" ); rs2

# Plot NF and AP
t.nf = ggplot(t4, aes(NF)) +
  geom_density(fill="white") +
  geom_vline(xintercept=mean(t4$NF), col="red") +
  annotate("text", x = 12.1, y = 0.4, label = "Xo", size = 5)
d1 <- ggplot_build(t.nf)$data[[1]]
(pp1 = t.nf + geom_area(data = subset(d1, x > 12.85), aes(x=x, y=y), fill="darkseagreen2") +
  geom_vline(xintercept=Xs.NF, col="darkgreen", linetype = "dashed") +
  annotate("text", x = 13, y = 0.4, label = "Xs", size = 5))

t.ap = ggplot(t4, aes(AP)) +
  geom_density(fill="white") +
  geom_vline(xintercept=mean(t4$AP), col="red") +
  annotate("text", x = 1.3, y = 2.3, label = "Xo", size = 5)
d2 <- ggplot_build(t.ap)$data[[1]]
(pp2 = t.ap + geom_area(data = subset(d2, x < 1.205), aes(x=x, y=y), fill="lightblue") +
  geom_vline(xintercept=Xs.AP, col="darkblue", linetype = "dashed") +
  annotate("text", x = 1.1, y = 2.3, label = "Xs", size = 5))

grid.arrange(pp1,pp2, ncol=2)

## ---- GxE Interaction-----------------------------------------------------------------------------------------
t2 <- read.table("T1.txt", h=T)
str(t2)
t2$Bloco <- as.factor(t2$Bloco)
str(t2)

gxe <- rbind(t2, t4)
gxe$Turma <- as.factor(c(rep(1,20), rep(2, 20)))
str(gxe)
head(gxe); tail(gxe)

#NF
tapply(gxe$NF, gxe[,c(1,5)], mean)
fm3.nf <- aov(NF ~ Turma + Hibrido + Turma*Hibrido + Turma:Bloco, data=gxe)
anova(fm3.nf)  

#AP
tapply(gxe$AP, gxe[,c(1,5)], mean)
fm3.ap <- aov(AP ~ Turma + Hibrido + Turma*Hibrido + Turma:Bloco, data=gxe)
anova(fm3.ap)  

# Prepare data for plotting GxE
t2b <- rbind(t2,t2)
t2b[,"Trait"] <- c(rep(paste("NF"),20),rep(paste("AP"),20))
t2b[,"Valor"] <- c(t2[1:20,"NF"],t2[1:20,"AP"])
t2b <- t2b[,-c(3,4)]

t24 <- rbind(t2b,t4b)
t24[,"Turma"] <- c(rep(paste("T2"),40),rep(paste("T4"),40))
t24[,"Comb"] <- paste(t24$Turma, t24$Bloco, sep = "-") 

# GxE considering Turma and blocks
ggplot(data=t24, aes(x=Comb, y=Valor, group = Hibrido)) +
  geom_line(aes(color = Hibrido)) + geom_point(aes(color = Hibrido)) +
  labs(x = "Turma-Bloco", y = "Value", color = "Hybrids") +
  facet_grid(Trait ~ ., scales="free") 

# GxE considering mean values for Turma
g <- aggregate(t24$Valor,list("Turma"=t24$Turma, "Trait"=t24$Trait, "Hibrido"=t24$Hibrido),mean)
ggplot(data=g, aes(x=Turma, y=x, group = Hibrido)) +
  geom_line(aes(color = Hibrido)) + geom_point(aes(color = Hibrido)) +
  labs(x = "Turma", y = "Value", color = "Hybrids") +
  facet_grid(Trait ~ ., scales="free") 

# new h2 considering GxE
b = list(anova(fm3.nf), anova(fm3.ap))
rs3.f = matrix(NA,4,2)
for (i in 1:2) {
  rs3.f[1,i] <- Vg <- round((b[[i]][[3]][2] - b[[i]][[3]][5])/(r*2),4)
  rs3.f[2,i] <- Ve <- round(b[[i]][[3]][5],4)
  rs3.f[3,i] <- Vga <- round((b[[i]][[3]][3] - b[[i]][[3]][5])/r ,4)
  rs3.f[4,i] <- new.h <- round(Vg / (Vg + Vga/2 + Ve/(r*2)),4)
  rownames(rs3.f) = c("Vg", "Ve", "Vga", "New.h2")
  colnames(rs3.f) = c("NF", "AP")
}
rs3.f

## ---- Correlation---------------------------------------------------------------------------------------------
t4$APNF <- as.numeric(t4$AP + t4$NF)
head(t4)

fm3 <- aov(APNF ~ Bloco + Hibrido, data = t4)
anova(fm3)

COVf <- (anova(fm3)[[3]][2]-anova(fm1.nf)[[3]][2]-anova(fm1.ap)[[3]][2])/2 # pheno cov.
COVe <- (anova(fm3)[[3]][3]-anova(fm1.nf)[[3]][3]-anova(fm1.ap)[[3]][3])/2 # env. cov
COVg <- (COVf - COVe)/r # gen. cov.

(corg <- round(COVg/sqrt(Vg.nf*Vg.ap),2)) # gen.corr

## ---- Indirect and direct gains-------------------------------------------------------------------------------

# estimate i tab
i <- function(persel){
  estimate<-dnorm(qnorm(1-(persel/100), 0, 1), 0, 1)/(persel/100)
     return(estimate)
  }

(isel <- i(40)) # 40%

(GSi.NF_AP <- round(isel*corg*sqrt(h.nf)*sqrt(Vg.ap),2)) # selecting NF for AP
(GSi.AP_NF <- round(isel*corg*sqrt(h.ap)*sqrt(Vg.nf),2)) # selecting AP for NF
(GSd.NF <- round(isel*1.00*sqrt(h.nf)*sqrt(Vg.nf),2))
(GSd.AP <- round(isel*1.00*sqrt(h.ap)*sqrt(Vg.ap),2))