################## # BreedR for GS ################## # loading data Y <- readRDS("phenoGS") head(Y) str(Y) #identifying the five best hybrids (top5 <- droplevels(Y[order(Y[,"BLUE"], decreasing = TRUE)[1:5],"gid"])) # loading the marker file M <- readRDS("M") dim(M) # selecting just the hybrids match(Y$gid, rownames(M)) M <- M[match(Y$gid, rownames(M)),] dim(M) head(M[,1:6]) tail(M[,1:6]) all(Y$gid == rownames(M)) # building the Ga matrix library(snpReady) Ga <- G.matrix(M, method = "VanRaden", format = "wide", plot = F)$Ga colnames(Ga) <- rownames(Ga) <- Y$gid dim(Ga) Ga[1:6,1:6] # creating the Za for additive effects Za <- model.matrix(~ -1 + gid, data = Y) colnames(Za) <- colnames(Ga) dim(Za) Za[1:6, 1:6] #################### # GS using A model #################### #install.packages('devtools') #devtools::install_github('famuvie/breedR') library(breedR) nfold <- 5 replicates <- 5 n <- dim(Y)[1] set.seed(29121983) # to obtain the same sampling output <- data.frame() GEBV <- data.frame() for (r in 1:replicates) { cat("Processing the replicate", r, "\n") ## test sets sets <- sample(rep(1:nfold, length.out = n)) # different orders of the same vector for (k in 1:nfold) { cat("Processing the fold", k, "\n") itest = which(sets == k) itrain = which(sets != k) YGS <- Y YGS[itest, "BLUE"] <- NA sol <- remlf90(fixed = BLUE ~ 1, generic = list(A = list(Za, Ga)), data = YGS, method = "em") ## predicted values for the test set blups <- sol$ranef[[1]][[1]] rownames(blups) <- colnames(Ga) top5pred <- rownames(blups[order(blups[,1], decreasing = TRUE)[1:5],1:2]) predicted.blups <- as.matrix(blups[itest,]) observed.blups <- Y[itest, "BLUE"] output <- rbind(output, data.frame( PA = cor(predicted.blups[,1], observed.blups), CS = sum(top5pred %in% top5)/length(top5), hm = sol$var[1,1]/sum(sol$var[,1]), LL = logLik(sol)[1], AIC = (-2*logLik(sol) + 2*dim(sol$var)[1])[1]) ) GEBV = rbind(GEBV, data.frame( gid = rownames(blups), gebv = blups[,1])) } } output (result <- apply(output, 2, mean)) dim(GEBV) head(GEBV) GEBV2 <- as.matrix(tapply(GEBV$gebv, GEBV$gid, mean)) head(GEBV2) GEBV2 <- GEBV2[(match(rownames(M), rownames(GEBV2))),] GEBV3 <- as.matrix(GEBV2) colnames(GEBV3) <- "GEBV" head(GEBV3) ################### ## markers effect and RR-BLUP ################### # fuction to estimate the variance vpq <- function(x){ p <- sum(x)/(length(x)*2) pq <- p * (1 - p) return(pq) } vg <- 2*sum(apply(M, 2, vpq)) # and finally the marker effects a <- t(M) %*% solve(Ga) %*% GEBV3 / vg all(rownames(a) == colnames(M)) colnames(a) <- "marker effect" head(a) dim(a) # correlation betwwen GBLUP and RR-BLUP rrblup.gbv <- M %*% a colnames(rrblup.gbv) <- "RR-BLUP" head(rrblup.gbv) (pho <- round(cor(GEBV3, rrblup.gbv),2)) joint <- data.frame(gid = Y$gid, GBLUP = GEBV3, RRBLUP = rrblup.gbv) head(joint) library(plotly) (p <- plot_ly(joint, x = ~GEBV, y = ~RR.BLUP, type = 'scatter', mode = 'markers', text = ~paste('gid: ', gid))) ############### # GS using OTS ############### #install.packages('devtools') #devtools::install_github('famuvie/breedR') library(breedR) OTS <- readRDS("OTS") names(OTS) output2 <- data.frame() for (i in 1:length(OTS)){ cat("Processing OTS", i, "\n") itrain <- OTS[[i]] itest <- Y$gid[!(Y$gid) %in% itrain] YGS <- Y YGS[Y$gid %in% itest, "BLUE"] <- NA sol <- remlf90(fixed = BLUE ~ 1, generic = list(A = list(Za, Ga)), data = YGS, method = "em") ## predicted values for the test set blups <- sol$ranef[[1]][[1]] rownames(blups) <- colnames(Ga) blups$gid <- rownames(blups) top5pred <- rownames(blups[order(blups[,1], decreasing = TRUE)[1:5],1:2]) predicted.blups <- blups[blups$gid %in% itest,] observed.blups <- Y[itest, "BLUE"] output2 <- rbind(output2, data.frame( OTS = names(OTS)[i], PA = cor(predicted.blups[,1], observed.blups), CS = sum(top5pred %in% top5)/length(top5), hm = sol$var[1,1]/sum(sol$var[,1]), LL = logLik(sol)[1], AIC = (-2*logLik(sol) + 2*dim(sol$var)[1])[1]) ) } library(knitr) kable(output2) library(plotly) (p2 <- plot_ly(output2, x = ~OTS, y = ~ PA, type = "bar")) ########################################