################################ # GWAS ############################### # loading phenotypes Y <- readRDS("phenoGS") head(Y) dim(Y) # loading the marker file M <- readRDS("M") dim(M) # selecting just the hybrids match(Y$gid, rownames(M)) M <- M[match(Y$gid, rownames(M)),] dim(M) head(M[,1:6]) tail(M[,1:6]) all(Y$gid == rownames(M)) # loading the hapmap hapmap <- readRDS("hapmap") head(hapmap) colnames(hapmap) <- c("SNP", "Chrm", "Pos") dim(hapmap) #double check all(colnames(M) == hapmap$SNP) # add a new collum (gid) M2 <- cbind(as.numeric(rownames(M)), M) colnames(M2)[1] <- "Taxa" head(M2[,1:6]) dim(M2) # PCA based on Ga library(snpReady) Ga <- G.matrix(M, method = "VanRaden", format = "wide", plot = F)$Ga colnames(Ga) <- rownames(Ga) <- Y$gid dim(Ga) Ga[1:6,1:6] eig.result <- eigen(Ga) lambda <- eig.result$values length(lambda) plot(lambda/sum(lambda), ylab = "Fraction Explained", col = "red") PCA <- eig.result$vectors ################### FarmCPUpp ############## library(bigmemory) library(biganalytics) library(compiler) source("http://zzlab.net/GAPIT/gapit_functions.txt") source("http://zzlab.net/FarmCPU/FarmCPU_functions.txt") # preparing final files head(Y) myY <- Y[,c(1,2)] head(myY) myGD <- M2 myGM <- hapmap myCV <- PCA head(myCV[,1:5]) # GWAS models MPS.naive <- FarmCPU(Y = myY, GD = myGD, GM = myGM, cutOff = 0.05) MPS.PC1 <- FarmCPU(Y = myY, GD = myGD, GM = myGM, CV = myCV[,1], cutOff = 0.05) MPS.PC2 <- FarmCPU(Y = myY, GD = myGD, GM = myGM, CV = myCV[,1:2], cutOff = 0.05) # identifying the real p.threshold value for our data p.MPS <- FarmCPU.P.Threshold(Y = myY <- Y[,c(1,2)], GD = myGD, GM = myGM, trait = "BLUE", theRep = 30) p.MPS (p.Bonf <- 0.05/ncol(M)) # p by Bonferroni # the newest threshold (thr.MPS <- -log10(p.MPS)) -log10(p.Bonf) #threshold by Bonferroni # running again but using the newest threshold MPS <- FarmCPU(Y = myY, GD = myGD, GM = myGM, p.threshold = p.MPS, threshold.output = p.MPS, MAF.calculate = TRUE, cutOff = c((p.MPS*ncol(M)), 0.05)) # significance, MAF, and allele substitution effect head(MPS$GWAS) (qtl <- MPS$GWAS[MPS$GWAS$P.value < p.MPS,]) # identifying the marker # boxplots = phenotypes ~ number of copies SNP <- droplevels(qtl$SNP) par(mfrow = c(1,nrow(qtl))) for(i in 1: length(SNP)){ boxplot(Y$BLUE ~ M[,SNP[i]], main = SNP[i], xlab = "Number of copies", ylab = "MPS", col = "red") } # number of genotypes per class for(i in 1:length(SNP)) { print(SNP[i]) print(table(M[,SNP[i]])) } # heritability and R2 qtl$Va <-round(2 * qtl$maf * (1 - qtl$maf) * qtl$effect^2, 3) qtl$ha <- round(qtl$Va / var(Y$BLUE), 2) qtl$R2 <- round(qtl$ha^2, 2) library(knitr) kable(qtl) ###################