####################################### # adjusting phenotypes for two-step GS ####################################### # loading files pheno <- readRDS("pheno2") head(pheno) # selecting just the hybrids pheno <- droplevels.data.frame(pheno[pheno$type == "sc",]) pheno$GN <- as.factor(paste(pheno$gid, pheno$N, sep = "")) head(pheno) # adjusting the model library(breedR) # First way - genotypes as fixed solF <- remlf90(fixed = MPSadj ~ N + gid, random = ~ rep + GN, data = pheno, method = "em") (MPS.fixed <- solF$fixed$gid) # GE deviations head(solF$ranef$GN[[1]]) # Second way - deregressing phenotypes solR <- remlf90(fixed = MPSadj ~ N, random = ~ rep + gid + GN, method = "em", data = pheno) (MPS.random <- solR$ranef$gid) # realiability (rel <- 1 - (MPS.random[[1]][,2])^2/solR$var[2]) mean(rel) #accuracy (Ac.MPS <- sqrt(mean(rel))) # derregressing BLUPS dblup.MPS <- MPS.random[[1]][,1]/rel dblup.MPS # weights c <- 0.5 # this constant is amount of variation that we expect not be explained by markers (hg <- round(solR$var[2] / (solR$var[2] + solR$var[3]),2)) # heritability (w <- (1 - hg)/(c + (1 - rel) / rel*hg)) # combining the data phenoGS <- data.frame(gid = rownames(MPS.fixed[[1]]), BLUE = MPS.fixed[[1]][,1], dBLUP = dblup.MPS, w = w) head(phenoGS) #comparing the adjustemns par(mfrow = c(1,2)) hist(phenoGS$BLUE, col = "red", main = "BLUE", xlab = NULL) hist(phenoGS$dBLUP, col = "blue", main = "dBLUP", xlab = NULL) dev.off() cor(phenoGS$BLUE, phenoGS$dBLUP) plot(phenoGS$BLUE, phenoGS$dBLUP, xlab = "BLUES", ylab = "dBLUP" ) abline(lm(phenoGS$dBLUP ~ phenoGS$BLUE), col = "red") saveRDS(phenoGS, "phenoGS") ########################################### # OTS - reducing the number of individuals ########################################### # loading the marker file M <- readRDS("M") dim(M) M[1:20, 1:6] # selecting just the hybrids match(phenoGS$gid, rownames(M)) M <- M[match(phenoGS$gid, rownames(M)),] dim(M) head(M[,1:6]) tail(M[,1:6]) all(phenoGS$gid == rownames(M)) str(M) # Ga matrix library(snpReady) hapmap <- readRDS("hapmap") Ga <- G.matrix(M, method = "VanRaden", format = "wide", plot = F)$Ga dim(Ga) Ga[1:6, 1:6] # PCA and svd analysis svdGa <- svd(Ga, nu = ncol(Ga), nv = nrow(Ga)) plot(cumsum((svdGa$d[1:ncol(Ga)])^2/sum(svdGa$d^2)), col = "red") pcsGa <- Ga %*% svdGa$v rownames(pcsGa) <- rownames(Ga) sum(cumsum((svdGa$d[1:ncol(Ga)])^2/sum(svdGa$d^2)) < 0.90) # based on the Miztal results, 98% of Va in enough to represent the whole dataset n_of_pc <- function(K, proportion){ svdK <- svd(K, nu = ncol(K), nv = nrow(K)) cs <- sum(cumsum((svdK$d[1:ncol(K)])^2/sum(svdK$d^2)) < proportion) return(cs) } #however, lets try different proportions proportions <- c(.90, .95, .98) size <- c() for(i in 1:length(proportions)){ size <- c(size, n_of_pc(Ga, proportions[i])) } # diferent sizes of TS size # optimazing training sets - OTS #install.packages("STPGA") library(STPGA) OTS <- list() for(i in 1:length(size)) { OTS[[i]] <- GenAlgForSubsetSelectionNoTest(P = as.matrix(pcsGa), ntoselect = size[i], npop = ncol(Ga), nelite = size[i], mutprob = .8, niterations = 100, plotiters = TRUE, lambda = 1e-5, errorstat = "PEVMEAN", mc.cores = 4)[[1]] } names(OTS) <- paste("OTS", size, sep = "") OTS saveRDS(OTS, "OTS") ##############################