################################
# GWAS 
###############################
# loading phenotypes
Y <- readRDS("phenoGS")
head(Y)
dim(Y)

# loading the marker file
M <- readRDS("M")
dim(M)
# selecting just the hybrids
match(Y$gid, rownames(M))
M <- M[match(Y$gid, rownames(M)),]
dim(M)
head(M[,1:6])
tail(M[,1:6])
all(Y$gid == rownames(M))

# loading the hapmap
hapmap <- readRDS("hapmap")
head(hapmap)
colnames(hapmap) <- c("SNP", "Chrm", "Pos")
dim(hapmap)

#double check
all(colnames(M) == hapmap$SNP)

# add a new collum (gid)
M2 <- cbind(as.numeric(rownames(M)), M)
colnames(M2)[1] <- "Taxa"
head(M2[,1:6])
dim(M2)

# PCA based on Ga
library(snpReady)
Ga <- G.matrix(M, method = "VanRaden", format = "wide", plot = F)$Ga
colnames(Ga) <- rownames(Ga) <- Y$gid
dim(Ga)
Ga[1:6,1:6]

eig.result <- eigen(Ga)
lambda <- eig.result$values
length(lambda)
plot(lambda/sum(lambda), ylab = "Fraction Explained", col = "red")
PCA <- eig.result$vectors


################### FarmCPUpp ##############
library(bigmemory)
library(biganalytics)
library(compiler)
source("http://zzlab.net/GAPIT/gapit_functions.txt")
source("http://zzlab.net/FarmCPU/FarmCPU_functions.txt")

# preparing final files
head(Y)
myY <- Y[,c(1,2)]
head(myY)
myGD <- M2
myGM <- hapmap
myCV <- PCA
head(myCV[,1:5])

# GWAS models
MPS.naive <- FarmCPU(Y = myY, GD = myGD, GM = myGM, cutOff = 0.05)
MPS.PC1 <- FarmCPU(Y = myY, GD = myGD, GM = myGM, CV = myCV[,1], cutOff = 0.05)
MPS.PC2 <- FarmCPU(Y = myY, GD = myGD, GM = myGM, CV = myCV[,1:2], cutOff = 0.05)


# identifying the real p.threshold value for our data
p.MPS <- FarmCPU.P.Threshold(Y = myY <- Y[,c(1,2)], GD = myGD, GM = myGM, trait = "BLUE", theRep = 30)
p.MPS
(p.Bonf <- 0.05/ncol(M)) # p by Bonferroni


# the newest threshold
(thr.MPS <- -log10(p.MPS))
-log10(p.Bonf) #threshold by Bonferroni


# running again but using the newest threshold
MPS <- FarmCPU(Y = myY, GD = myGD, GM = myGM, p.threshold = p.MPS, threshold.output = p.MPS, MAF.calculate = TRUE, cutOff = c((p.MPS*ncol(M)), 0.05))

# significance, MAF, and allele substitution effect
head(MPS$GWAS)
(qtl <- MPS$GWAS[MPS$GWAS$P.value < p.MPS,]) # identifying the marker


# boxplots = phenotypes ~ number of copies
SNP <- droplevels(qtl$SNP)
par(mfrow = c(1,nrow(qtl)))

for(i in 1: length(SNP)){
  boxplot(Y$BLUE ~ M[,SNP[i]], main = SNP[i], xlab = "Number of copies", ylab = "MPS", col = "red")
  }


# number of genotypes per class
for(i in 1:length(SNP)) {
  print(SNP[i])
  print(table(M[,SNP[i]]))
}

# heritability and R2
qtl$Va <-round(2 * qtl$maf * (1 - qtl$maf) * qtl$effect^2, 3)
qtl$ha <- round(qtl$Va / var(Y$BLUE), 2)
qtl$R2 <- round(qtl$ha^2, 2)
library(knitr)
kable(qtl)

###################