if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager") BiocManager::install("tximport", version = "3.8") BiocManager::install("DESeq2", version = "3.8") #BiocManager::install("impute", version = "3.8") #BiocManager::install("preprocessCore", version = "3.8") #BiocManager::install("GO.db", version = "3.8") install.packages("jsonlite") #install.packages("WGCNA") #install.packages("mclust") install.packages("reshape2") library(DESeq2) library(tximport) library(jsonlite) library(pheatmap) #library(WGCNA) library(mclust) library(reshape2) library(ggplot2) rm(list=ls()) setwd("D:/Salmon") targets<-read.csv("target.csv",header=TRUE) rownames(targets)<-targets$SampleName files <- paste("D:/Salmon/", targets$SampleName, "_quant/quant.sf",sep="") tx2gene<-read.delim("transcript2gene.txt",header=FALSE) names(files) <- targets$SampleName all(file.exists(files)) txi.salmon <- tximport(files, type = "salmon", tx2gene = tx2gene, txOut=FALSE) #txi.salmon <- tximport(files, type = "salmon", txOut= TRUE, countsFromAbundance = 'lengthScaledTPM') head(txi.salmon$abundance) Arabtpm<-txi.salmon$abundance dds <- DESeqDataSetFromTximport(txi.salmon, colData = targets, design = ~Condition) dds$Condition <-relevel(dds$Condition, ref='MOCK') head(dds) dds <- DESeq(dds) vsd <- vst(dds, blind=FALSE) #Check distribution of samples in a PCA, showing the top 500 varying genes plotPCA(vsd, intgroup=c("Genotype"),ntop=500) plotPCA(vsd, intgroup=c("Condition"),ntop=500) plotPCA(vsd, intgroup=c("Condition","Genotype"),ntop=500) #Show clustering of samples, should be in agreement with PCA sampleDists <- dist(t(assay(vsd))) sampleDistMatrix <- as.matrix(sampleDists) rownames(sampleDistMatrix) <- paste(vsd$Condition, vsd$Genotype, sep="-") colnames(sampleDistMatrix) <- paste(vsd$Condition, vsd$Genotype, sep="-") pheatmap(sampleDistMatrix, clustering_distance_rows=sampleDists, clustering_distance_cols=sampleDists) #Getting differentially expressed genes, comparisons against control (MOCK) res_SALT_vs_MOCK <- results(dds, lfcThreshold=1, altHypothesis="greaterAbs", contrast = c('Condition','SALT','MOCK'), alpha=0.05) res_ABA_vs_MOCK <- results(dds, lfcThreshold=1, altHypothesis="greaterAbs", contrast = c('Condition','ABA','MOCK'), alpha=0.05) res_DEHYD_vs_MOCK <- results(dds, lfcThreshold=1, altHypothesis="greaterAbs", contrast = c('Condition','DEHYD','MOCK'), alpha=0.05) summary(res_SALT_vs_MOCK) summary(res_ABA_vs_MOCK) summary(res_DEHYD_vs_MOCK) sig_SALT_vs_MOCK<-res_SALT_vs_MOCK[which(res_SALT_vs_MOCK$padj<0.05),] dim(sig_SALT_vs_MOCK) head(sig_SALT_vs_MOCK) sig_ABA_vs_MOCK<-res_ABA_vs_MOCK[which(res_ABA_vs_MOCK$padj<0.05),] dim(sig_ABA_vs_MOCK) head(sig_ABA_vs_MOCK) sig_DEHYD_vs_MOCK<-res_DEHYD_vs_MOCK[which(res_DEHYD_vs_MOCK$padj<0.05),] dim(sig_DEHYD_vs_MOCK) head(sig_DEHYD_vs_MOCK) #Exploring DGE results dev.off() drawLines <- function() abline(h=c(-1,1),col="dodgerblue",lwd=2) plotMA(res_ABA_vs_MOCK); drawLines() plotMA(res_SALT_vs_MOCK); drawLines() plotMA(res_DEHYD_vs_MOCK); drawLines() TPM_SALT_vs_MOCK<-Arabtpm[which(rownames(Arabtpm) %in% rownames(sig_SALT_vs_MOCK)), as.character(targets[which(targets$Condition %in% c('SALT','MOCK')), 'SampleName'])] head(Arabtpm) dim(Arabtpm) dim(TPM_SALT_vs_MOCK) head(TPM_SALT_vs_MOCK) TPM_ABA_vs_MOCK<-Arabtpm[which(rownames(Arabtpm) %in% rownames(sig_ABA_vs_MOCK)), as.character(targets[which(targets$Condition %in% c('ABA','MOCK')),'SampleName'])] TPM_DEHYD_vs_MOCK<-Arabtpm[which(rownames(Arabtpm) %in% rownames(sig_DEHYD_vs_MOCK)), as.character(targets[which(targets$Condition %in% c('DEHYD','MOCK')),'SampleName'])] dev.off annot_col= data.frame(Genotype=factor(targets$Genotype), Condition=factor(targets$Condition)) rownames(annot_col)<-targets$SampleName pheatmap(TPM_SALT_vs_MOCK, scale='row', annotation_col = annot_col) pheatmap(TPM_ABA_vs_MOCK, scale='row', annotation_col = annot_col) pheatmap(TPM_DEHYD_vs_MOCK, scale='row', annotation_col = annot_col) ############Clustering and expression profile plotting targets ArabTPMMmean<-data.frame(SALT=apply(Arabtpm[,which(colnames(Arabtpm) %in% rownames(targets[which(targets$Condition == 'SALT'), ]))],1,mean), ABA=apply(Arabtpm[,which(colnames(Arabtpm) %in% rownames(targets[which(targets$Condition == 'ABA'), ]))],1,mean), DEHYD=apply(Arabtpm[,which(colnames(Arabtpm) %in% rownames(targets[which(targets$Condition == 'DEHYD'), ]))],1,mean), MOCK=apply(Arabtpm[,which(colnames(Arabtpm) %in% rownames(targets[which(targets$Condition == 'MOCK'), ]))],1,mean)) head(ArabTPMMmean) Zval<-function(x){ xmean<-mean(x) xsd<-sd(x) z<-(x-xmean)/xsd return(z) } ArabZval<-t(apply(ArabTPMMmean, 1, Zval)) head(ArabZval) head(ArabTPMMmean) dim(ArabZval) ArabClusters<-read.delim("GeneExpressionClusters.txt") head(ArabClusters) table(ArabClusters$Cluster) Cluster11<-ArabZval[which(rownames(ArabZval) %in% ArabClusters[which(ArabClusters$Cluster == 11),"Gene"]),] head(Cluster11) Cluster11<-as.data.frame(Cluster11) Cluster11$Gene<-rownames(Cluster11) head(Cluster11) Cluster11_melt<-melt(Cluster11,id.vars = 'Gene') head(Cluster11_melt) ggplot(Cluster11_melt, aes(y=value,x=variable,group=Gene))+ geom_line(colour='black')+ theme_bw()+ ylab('Relative expression value')+ xlab('Condition')