################## # BreedR for GS ################## # loading data pheno <- readRDS("pheno") head(pheno) tail(pheno) pheno <- droplevels.data.frame(pheno[pheno$type == "sc",]) # correlation among traits round(cor(pheno[,7:9]),2) library("PerformanceAnalytics") chart.Correlation(as.matrix(pheno[,7:9]), histogram = TRUE, pch = 19) # selecting just SRA and NAE, and adjusting for normality library(GenABEL) pheno$MPSadj <- as.numeric(rntransform(pheno$MPS)) shapiro.test(pheno$MPSadj) pheno$NAEadj <- as.numeric(rntransform(pheno$NAE)) shapiro.test(pheno$NAEadj) head(pheno) # estimating the BLUES library(breedR) MPS <- remlf90(fixed = MPSadj ~ N + gid, random = ~ rep, method = "em", data = pheno) NAE <- remlf90(fixed = NAEadj ~ N + gid, random = ~ rep, method = "em", data = pheno) Y <- data.frame(gid = rownames(MPS$fixed$gid[[1]]), MPS = MPS$fixed$gid[[1]], NAE = NAE$fixed$gid[[1]])[,c(1,2,4)] colnames(Y)[2:3] <- c("MPS", "NAE") head(Y) head(Y) str(Y) #identifying the five best hybrids for each trait (top5A <- droplevels(Y[order(Y[,2], decreasing = TRUE)[1:5],"gid"])) (top5B <- droplevels(Y[order(Y[,3], decreasing = TRUE)[1:5],"gid"])) # loading the marker file M <- readRDS("M") dim(M) # selecting just the hybrids match(Y$gid, rownames(M)) M <- M[match(Y$gid, rownames(M)),] dim(M) head(M[,1:6]) tail(M[,1:6]) all(Y$gid == rownames(M)) # building the Ga matrix library(snpReady) Ga <- G.matrix(M, method = "VanRaden", format = "wide", plot = F)$Ga colnames(Ga) <- rownames(Ga) <- Y$gid dim(Ga) Ga[1:6,1:6] # creating the Za for additive effects Za <- model.matrix(~ -1 + gid, data = Y) colnames(Za) <- colnames(Ga) dim(Za) Za[1:6, 1:6] # covariance matrices covg <- matrix(c(1, cov(Y[,2], Y[,3]),cov(Y[,2], Y[,3]),1), 2, 2) colnames(covg) <- rownames(covg) <- colnames(Y)[2:3] covg covr <- matrix(c(1, 0, 0, 1), 2, 2) colnames(covr) <- rownames(covr) <- colnames(Y)[2:3] covr #################### # GS using A model #################### #install.packages('devtools') #devtools::install_github('famuvie/breedR') library(breedR) nfold <- 5 replicates <- 5 n <- dim(Y)[1] set.seed(29121983) # to obtain the same sampling output <- data.frame() GEBV <- data.frame() for (r in 1:replicates) { cat("Processing the replicate", r, "\n") ## test sets sets <- sample(rep(1:nfold, length.out = n)) # different orders of the same vector for (k in 1:nfold) { cat("Processing the fold", k, "\n") itest = which(sets == k) itrain = which(sets != k) YGS <- Y YGS[itest, 2] <- NA YGS[itest, 3] <- NA sol <- remlf90(fixed = cbind(Y[,2], Y[,3]) ~ 1, generic = list(A = list(Za, Ga, var.ini = covg)), data = YGS, method = "em", var.ini = list(covr)) ## predicted values for the test set blupsA <- as.matrix(sol$ranef[[1]][[1]]) rownames(blupsA) <- colnames(Ga) top5predA <- rownames(blupsA[order(blupsA[,1], decreasing = TRUE)[1:5],1:2]) predicted.blupsA <- as.matrix(blupsA[itest,]) observed.blupsA <- Y[itest, 2] blupsB <- as.matrix(sol$ranef[[1]][[2]]) rownames(blupsB) <- colnames(Ga) top5predB <- rownames(blupsB[order(blupsB[,1], decreasing = TRUE)[1:5],1:2]) predicted.blupsB <- as.matrix(blupsB[itest,]) observed.blupsB <- Y[itest, 3] output <- rbind(output, data.frame( PA.A = cor(predicted.blupsA[,1], observed.blupsA), CS.A = sum(top5predA %in% top5A)/length(top5A), hm.A = sol$var[[1]][1,1]/sum(sol$var[[1]][1,1], sol$var[[2]][1,1]), PA.B = cor(predicted.blupsB[,1], observed.blupsB), CS.B = sum(top5predB %in% top5B)/length(top5B), hm.B = sol$var[[1]][2,2]/sum(sol$var[[1]][2,2], sol$var[[2]][2,2]), LL = logLik(sol)[1] ) ) GEBV = rbind(GEBV, data.frame( gid = colnames(Ga), gebvA = blupsA[,1], gebvB = blupsB[,1])) } } output (result <- round(apply(output, 2, mean), 2)) # Estimating the average of the GEBV dim(GEBV) head(GEBV) GEBV2 <- cbind(tapply(GEBV$gebvA, GEBV$gid, mean), tapply(GEBV$gebvB, GEBV$gid, mean)) head(GEBV2) GEBV2 <- GEBV2[(match(rownames(M), rownames(GEBV2))),] GEBV3 <- as.matrix(GEBV2) colnames(GEBV3) <- colnames(Y)[2:3] head(GEBV3) # the new correlation between the traits cor(GEBV3) chart.Correlation(as.matrix(GEBV3), histogram = TRUE, pch = 19) ########################################