##################### # GS using GE models ##################### # Loading the phenotypes pheno <- readRDS("pheno2") head(pheno) dim(pheno) # removing the parents pheno <- droplevels.data.frame(pheno[pheno$type == "sc",]) head(pheno) tail(pheno) dim(pheno) # adjusting models per enviroment library(breedR) LN <- remlf90(fixed = MPSadj2 ~ gid, random = ~rep, method = "em", data = pheno[pheno$N == "low",]) IN <- remlf90(fixed = MPSadj2 ~ gid, random = ~rep, method = "em", data = pheno[pheno$N == "ideal",]) # joint the data by env phenoGE <- data.frame(env = c(rep("LN", dim(LN$fixed[[1]][[1]])[1]), rep("IN", dim(IN$fixed[[1]][[1]])[1])), gid = c(rownames(LN$fixed[[1]][[1]]), rownames(IN$fixed[[1]][[1]])), MPS = c(LN$fixed[[1]][[1]][,1], IN$fixed[[1]][[1]][,1])) head(phenoGE) tail(phenoGE) str(phenoGE) # loading the marker file M <- readRDS("M") dim(M) # selecting just the hybrids match(levels(pheno$gid), rownames(M)) M <- M[match(levels(pheno$gid), rownames(M)),] dim(M) head(M[,1:6]) tail(M[,1:6]) all(phenoGE$gid %in% rownames(M)) all(rownames(M) %in% phenoGE$gid) ####################################### #===== Multi-Environment models ======# ####################################### # Packages installation # install.packages("devtools") # devtools::install_github('italo-granato/BGGE') # install.packages("MCMCpack") # devtools::install_github('QuantGen/MTM') library(BGGE) args(getK) # getK - function to create kernels for different genomic GE models ################################## # Main genotypic effect model (MM) ################################## #It assumes that genetic effects across environments are constant K.MMGB <- getK(Y = phenoGE, X = M, kernel = "GB", model = "MM") str(K.MMGB) # Prediction # BGGE - function that makes predictions args(BGGE) (ne <- as.vector(table(phenoGE$env))) # number of genotypes by environmens # running using the whole data fitMMGB <- BGGE(y = phenoGE$MPS, K = K.MMGB, ne = ne, ite = 500, burn = 200, thin = 5) names(fitMMGB) # Cross validation 1 (CV1) - Some genotypes were not observed in any environment # Simulating missing values IDs <- levels(phenoGE$gid) #unique genotypes IDy <- as.vector(phenoGE$gid) #vector of genotypes nFolds <- 5 # n of groups partitions <- 1 env <- as.vector(unique(phenoGE$env)) mfold <- matrix(NA, ncol = partitions, nrow = length(IDs)) sets1 <- matrix(NA, nrow = nrow(phenoGE), ncol = partitions) set.seed(29121983) for (s in 1:partitions) { mfold[, s] <- sample(1:nFolds, size = length(IDs), replace = TRUE) for (i in 1:nrow(phenoGE)) { sets1[i, s] <- mfold[which(IDs == phenoGE$gid[i]), s] } } # Cheking missing for genotypes yNA <- phenoGE yNA$MPS[sets1[,1] == 1] <- NA library(reshape2) tmp <- acast(data = yNA, gid ~ env, value.var = "MPS") #transforming to env in columns head(tmp, 10) # showing the 10 first rggMM1 <- matrix(0, length(env), nFolds) # Matrix of results NULL for(i in 1:nFolds){ yNA <- phenoGE yNA$MPS[sets1[,1] == i] <- NA fitMMGB1 <- BGGE(y = yNA$MPS, K = K.MMGB, ne = ne, ite = 500, burn = 200, thin = 5, verbose = FALSE) fm <- fitMMGB1$yHat for(g in 1:length(env)){ tst1 <- sets1[,1] == i & phenoGE$env == env[g] rggMM1[g, i] <- cor(phenoGE$MPS[tst1], fm[tst1]) } } rggMM1 rowMeans(rggMM1) # Cross validation 2 (CV2) - Genotypes were tested in some environments but not in other # Simulating missing values sets2 <- matrix(NA, nrow = nrow(phenoGE), ncol = partitions) set.seed(29121983) for (s in 1:partitions) { for (i in IDs) { tmp = which(IDy == i) ni = length(tmp) tmpFold <- sample(1:nFolds, size = ni, replace = ni * nFolds) sets2[tmp, s] <- tmpFold } } #Checking missing values yNA <- phenoGE yNA$MPS[sets2[,1] == 1] <- NA tmp <- acast(data = yNA, gid ~ env, value.var = "MPS") head(tmp, 10) # Prediction rggMM2 <- matrix(0, length(env), nFolds) for(i in 1:nFolds){ yNA <- phenoGE yNA$MPS[sets2[,1] == i] <- NA fitMMGB2 <- BGGE(y = yNA$MPS, K = K.MMGB, ne = ne, ite = 500, burn = 200, thin = 5) fm <- fitMMGB2$yHat for(g in 1:length(env)){ tst1 <- sets2[,1] == i & phenoGE$env == env[g] rggMM2[g, i] <- cor(phenoGE$MPS[tst1], fm[tst1]) } } rggMM2 rowMeans(rggMM2) ############################################################## # Single variance genotype × environment deviation model (MDs) ############################################################## # Incorporates a random deviation effect of GE interaction K.MDsGB <- getK(Y = phenoGE, X = M, kernel = "GB", model = "MDs") str(K.MDsGB) (ne <- as.vector(table(phenoGE$env))) # number of genotypes by environmens # Cross validation 1 (CV1) - Some genotypes were not observed in any environment # Simulating missing values IDs <- levels(phenoGE$gid) #unique genotypes IDy <- as.vector(phenoGE$gid) #vector of genotypes nFolds <- 5 # n of groups partitions <- 1 env <- as.vector(unique(phenoGE$env)) mfold <- matrix(NA, ncol = partitions, nrow = length(IDs)) sets1 <- matrix(NA, nrow = nrow(phenoGE), ncol = partitions) set.seed(29121983) for (s in 1:partitions) { mfold[, s] <- sample(1:nFolds, size = length(IDs), replace = TRUE) for (i in 1:nrow(phenoGE)) { sets1[i, s] <- mfold[which(IDs == phenoGE$gid[i]), s] } } # Cheking missing for genotypes yNA <- phenoGE yNA$MPS[sets1[,1] == 1] <- NA library(reshape2) tmp <- acast(data = yNA, gid ~ env, value.var = "MPS") #transforming to env in columns head(tmp, 10) # showing the 10 first rggMDs1 <- matrix(0, length(env), nFolds) # Matrix of results NULL for(i in 1:nFolds){ yNA <- phenoGE yNA$MPS[sets1[,1] == i] <- NA fitMDsGB1 <- BGGE(y = yNA$MPS, K = K.MDsGB, ne = ne, ite = 500, burn = 200, thin = 5, verbose = FALSE) fm <- fitMDsGB1$yHat for(g in 1:length(env)){ tst1 <- sets1[,1] == i & phenoGE$env == env[g] rggMDs1[g, i] <- cor(phenoGE$MPS[tst1], fm[tst1]) } } rggMDs1 rowMeans(rggMDs1) # Cross validation 2 (CV2) - Genotypes were tested in some environments but not in other # Simulating missing values sets2 <- matrix(NA, nrow = nrow(phenoGE), ncol = partitions) set.seed(29121983) for (s in 1:partitions) { for (i in IDs) { tmp = which(IDy == i) ni = length(tmp) tmpFold <- sample(1:nFolds, size = ni, replace = ni * nFolds) sets2[tmp, s] <- tmpFold } } #Checking missing values yNA <- phenoGE yNA$MPS[sets2[,1] == 1] <- NA tmp <- acast(data = yNA, gid ~ env, value.var = "MPS") head(tmp, 10) # Prediction rggMDs2 <- matrix(0, length(env), nFolds) for(i in 1:nFolds){ yNA <- phenoGE yNA$MPS[sets2[,1] == i] <- NA fitMDsGB2 <- BGGE(y = yNA$MPS, K = K.MDsGB, ne = ne, ite = 500, burn = 200, thin = 5) fm <- fitMDsGB2$yHat for(g in 1:length(env)){ tst1 <- sets2[,1] == i & phenoGE$env == env[g] rggMDs2[g, i] <- cor(phenoGE$MPS[tst1], fm[tst1]) } } rggMDs2 rowMeans(rggMDs2) ####################################################### # Environment-specific variance deviation model (MDe) ####################################################### #Genetic effect splitted into two components: #the common effect for all environments (across environment effects) and a random deviation effect for each environment (environment-specific effects) K.MDeGB <- getK(Y = phenoGE, X = M, kernel = "GB", model = "MDe") str(K.MDeGB) (ne <- as.vector(table(phenoGE$env))) # number of genotypes by environmens # Simulating missing values IDs <- levels(phenoGE$gid) #unique genotypes IDy <- as.vector(phenoGE$gid) #vector of genotypes nFolds <- 5 # n of groups partitions <- 1 env <- as.vector(unique(phenoGE$env)) mfold <- matrix(NA, ncol = partitions, nrow = length(IDs)) sets1 <- matrix(NA, nrow = nrow(phenoGE), ncol = partitions) set.seed(29121983) for (s in 1:partitions) { mfold[, s] <- sample(1:nFolds, size = length(IDs), replace = TRUE) for (i in 1:nrow(phenoGE)) { sets1[i, s] <- mfold[which(IDs == phenoGE$gid[i]), s] } } # Cheking missing for genotypes yNA <- phenoGE yNA$MPS[sets1[,1] == 1] <- NA library(reshape2) tmp <- acast(data = yNA, gid ~ env, value.var = "MPS") #transforming to env in columns head(tmp, 10) # showing the 10 first rggMDe1 <- matrix(0, length(env), nFolds) # Matrix of results NULL for(i in 1:nFolds){ yNA <- phenoGE yNA$MPS[sets1[,1] == i] <- NA fitMDeGB1 <- BGGE(y = yNA$MPS, K = K.MDeGB, ne = ne, ite = 500, burn = 200, thin = 5, verbose = FALSE) fm <- fitMDeGB1$yHat for(g in 1:length(env)){ tst1 <- sets1[,1] == i & phenoGE$env == env[g] rggMDe1[g, i] <- cor(phenoGE$MPS[tst1], fm[tst1]) } } rggMDe1 rowMeans(rggMDe1) # Cross validation 2 (CV2) - Genotypes were tested in some environments but not in other # Simulating missing values sets2 <- matrix(NA, nrow = nrow(phenoGE), ncol = partitions) set.seed(29121983) for (s in 1:partitions) { for (i in IDs) { tmp = which(IDy == i) ni = length(tmp) tmpFold <- sample(1:nFolds, size = ni, replace = ni * nFolds) sets2[tmp, s] <- tmpFold } } #Checking missing values yNA <- phenoGE yNA$MPS[sets2[,1] == 1] <- NA tmp <- acast(data = yNA, gid ~ env, value.var = "MPS") head(tmp, 10) # Prediction rggMDe2 <- matrix(0, length(env), nFolds) for(i in 1:nFolds){ yNA <- phenoGE yNA$MPS[sets2[,1] == i] <- NA fitMDeGB2 <- BGGE(y = yNA$MPS, K = K.MDeGB, ne = ne, ite = 500, burn = 200, thin = 5) fm <- fitMDeGB2$yHat for(g in 1:length(env)){ tst1 <- sets2[,1] == i & phenoGE$env == env[g] rggMDe2[g, i] <- cor(phenoGE$MPS[tst1], fm[tst1]) } } rggMDe2 rowMeans(rggMDe2) ############################################## # comparing the GE, environments, and cross-validation models ############################################### output <- data.frame(model = rep(c("MM", "MDs", "MDe"), each = 2), env = rep(env, 3), CV = rep(c("CV1", "CV2"),3), Ac = c(rowMeans(rggMM1), rowMeans(rggMM2), rowMeans(rggMDs1), rowMeans(rggMDs2), rowMeans(rggMDe1), rowMeans(rggMDe2))) par(mfrow = c(1,3)) barplot(tapply(output$Ac, output$CV, mean), col = c("blue", "lightblue"), cex.names = 0.75, main = "Cross-validation", ylim = c(0, 0.6)) barplot(tapply(output$Ac, output$model, mean), col = c("darkred", "red", "orange"), cex.names = 0.75, main = "GE model", ylim = c(0, 0.6)) barplot(tapply(output$Ac, output$env, mean), col = c("darkgreen", "lightgreen"), cex.names = 0.75, main = "N-level", ylim = c(0, 0.6)) ##############################################