if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager") BiocManager::install("tximport") BiocManager::install("DESeq2") install.packages("VennDiagram") install.packages("mclust") library(tximport) library(VennDiagram) library(DESeq2) library(reshape2) library(pheatmap) library(ggplot2) library(mclust) rm(list=ls()) setwd("~/RNASeqPracticalTranscriptLevels/") targets<-read.csv("RNASeqTranscriptLevelsCleanDATA/target.csv",header=TRUE) targets rownames(targets)<-targets$SampleName targets files <- paste("~/RNASeqPracticalTranscriptLevels/Salmon/", targets$SampleName, "_salmon/quant.sf",sep="") files tx2gene<-read.delim("ArabidopsiscDNAINDEX/tx2gene.txt",header=FALSE) tx2gene names(files) <- targets$SampleName files all(file.exists(files)) txi.salmon <- tximport(files, type = "salmon", tx2gene = tx2gene, txOut=FALSE) head(txi.salmon$abundance) Arabtpm<-txi.salmon$abundance dds <- DESeqDataSetFromTximport(txi.salmon, colData = targets, design = ~Condition) dds$Condition <-relevel(dds$Condition, ref='MOCK') head(dds) dds <- DESeq(dds) vsd <- vst(dds, blind=FALSE) #Check distribution of samples in a PCA, showing the top 500 varying genes plotPCA(vsd, intgroup=c("Genotype"),ntop=500) plotPCA(vsd, intgroup=c("Condition"),ntop=500) plotPCA(vsd, intgroup=c("Condition","Genotype"),ntop=500) #Show clustering of samples, should be in agreement with PCA sampleDists <- dist(t(assay(vsd))) sampleDistMatrix <- as.matrix(sampleDists) rownames(sampleDistMatrix) <- paste(vsd$Condition, vsd$Genotype, sep="-") colnames(sampleDistMatrix) <- paste(vsd$Condition, vsd$Genotype, sep="-") pheatmap(sampleDistMatrix, clustering_distance_rows=sampleDists, clustering_distance_cols=sampleDists) #Getting differentially expressed genes, comparisons against control (MOCK) res_SALT_vs_MOCK <- results(dds, lfcThreshold=1, altHypothesis="greaterAbs", contrast = c('Condition','SALT','MOCK'), alpha=0.05) res_ABA_vs_MOCK <- results(dds, lfcThreshold=1, altHypothesis="greaterAbs", contrast = c('Condition','ABA','MOCK'), alpha=0.05) res_DEHYD_vs_MOCK <- results(dds, lfcThreshold=1, altHypothesis="greaterAbs", contrast = c('Condition','DEHYD','MOCK'), alpha=0.05) summary(res_SALT_vs_MOCK) summary(res_ABA_vs_MOCK) summary(res_DEHYD_vs_MOCK) sig_SALT_vs_MOCK<-res_SALT_vs_MOCK[which(res_SALT_vs_MOCK$padj<0.05),] dim(sig_SALT_vs_MOCK) head(sig_SALT_vs_MOCK) sig_ABA_vs_MOCK<-res_ABA_vs_MOCK[which(res_ABA_vs_MOCK$padj<0.05),] dim(sig_ABA_vs_MOCK) head(sig_ABA_vs_MOCK) sig_DEHYD_vs_MOCK<-res_DEHYD_vs_MOCK[which(res_DEHYD_vs_MOCK$padj<0.05),] dim(sig_DEHYD_vs_MOCK) head(sig_DEHYD_vs_MOCK) #Exploring DGE results dev.off() drawLines <- function() abline(h=c(-1,1),col="dodgerblue",lwd=2) plotMA(res_ABA_vs_MOCK); drawLines() plotMA(res_SALT_vs_MOCK); drawLines() plotMA(res_DEHYD_vs_MOCK); drawLines() #Let's plot the vulcano plots seen in class res_ABA_vs_MOCK_df<-as.data.frame(res_ABA_vs_MOCK) res_ABA_vs_MOCK_df$Sig<-'NotSig' res_ABA_vs_MOCK_df[which(res_ABA_vs_MOCK_df$padj < 0.05),'Sig']<-'Yes' ggplot(res_ABA_vs_MOCK_df,aes(x=log2FoldChange, y=log(padj,base=10)*-1, colour=Sig))+ theme_bw()+ geom_point()#+ # ylim(0,5)+ # xlim(-2,2) #Getting the TPM for the DE genes TPM_SALT_vs_MOCK<-Arabtpm[which(rownames(Arabtpm) %in% rownames(sig_SALT_vs_MOCK)), as.character(targets[which(targets$Condition %in% c('SALT','MOCK')), 'SampleName'])] head(Arabtpm) dim(Arabtpm) dim(TPM_SALT_vs_MOCK) head(TPM_SALT_vs_MOCK) TPM_ABA_vs_MOCK<-Arabtpm[which(rownames(Arabtpm) %in% rownames(sig_ABA_vs_MOCK)), as.character(targets[which(targets$Condition %in% c('ABA','MOCK')),'SampleName'])] TPM_DEHYD_vs_MOCK<-Arabtpm[which(rownames(Arabtpm) %in% rownames(sig_DEHYD_vs_MOCK)), as.character(targets[which(targets$Condition %in% c('DEHYD','MOCK')),'SampleName'])] annot_col= data.frame(Genotype=factor(targets$Genotype), Condition=factor(targets$Condition)) rownames(annot_col)<-targets$SampleName pheatmap(TPM_SALT_vs_MOCK, scale='row', annotation_col = annot_col) pheatmap(TPM_ABA_vs_MOCK, scale='row', annotation_col = annot_col) pheatmap(TPM_DEHYD_vs_MOCK, scale='row', annotation_col = annot_col) #CreATE A vENN DIAGRAM WITH degS #https://www.r-graph-gallery.com/14-venn-diagramm.html # Prepare a palette of 3 colors with R colorbrewer: library(RColorBrewer) myCol <- brewer.pal(3, "Pastel2") venn.diagram(x=list(rownames(TPM_SALT_vs_MOCK),rownames(TPM_ABA_vs_MOCK),rownames(TPM_DEHYD_vs_MOCK)), category.names = c("SALT_vs_MOCK" , "ABA_vs_MOCK " , "DEHYD_vs_MOCK"), filename='VennDiagramDEGsimple.png', imagetype = 'png', output=TRUE) venn.diagram(x=list(rownames(TPM_SALT_vs_MOCK),rownames(TPM_ABA_vs_MOCK),rownames(TPM_DEHYD_vs_MOCK)), category.names = c("SALT_vs_MOCK" , "ABA_vs_MOCK " , "DEHYD_vs_MOCK"), filename='VennDiagramDEG.png', imagetype = 'png', output=TRUE, # Circles lwd = 2, lty = 'blank', fill = myCol, # Set names cat.cex = 0.6, cat.fontface = "bold", cat.default.pos = "outer", cat.pos = c(-27, 27, 135), cat.dist = c(0.055, 0.055, 0.085), cat.fontfamily = "sans", rotation = 1) #Clustering expression profiles #let's get all the TPM for all the DEG a<-union(rownames(TPM_SALT_vs_MOCK),rownames(TPM_ABA_vs_MOCK)) b<-union(rownames(TPM_DEHYD_vs_MOCK),a) length(b) TPM_DEGs<-as.data.frame(Arabtpm[which(rownames(Arabtpm) %in% b),]) dim(TPM_DEGs) head(TPM_DEGs) targets TPM_DEG_means<-as.data.frame(matrix(ncol=4,nrow=nrow(TPM_DEGs),data = NA)) colnames(TPM_DEG_means)<-c('DEHYD','SALT','ABA','MOCK') TPM_DEG_means$DEHYD<-rowMeans(TPM_DEGs[,as.character(targets[which(targets$Condition=='DEHYD'),'SampleName'])]) TPM_DEG_means$SALT<-rowMeans(TPM_DEGs[,as.character(targets[which(targets$Condition=='SALT'),'SampleName'])]) TPM_DEG_means$ABA<-rowMeans(TPM_DEGs[,as.character(targets[which(targets$Condition=='ABA'),'SampleName'])]) TPM_DEG_means$MOCK<-rowMeans(TPM_DEGs[,as.character(targets[which(targets$Condition=='MOCK'),'SampleName'])]) rownames(TPM_DEG_means)<-rownames(TPM_DEGs) head(TPM_DEG_means) head(TPM_DEGs) scale(TPM_DEG_means) Zval<-function(x){ xmean<-mean(x) xsd<-sd(x) z<-(x-xmean)/xsd return(z) } ArabZval<-as.data.frame(t(apply(TPM_DEG_means, 1, Zval))) head(ArabZval) head(ArabZval) head(TPM_DEG_means) ArabClustering<-Mclust(ArabZval,G=35) #In real life we need to apply different technique to choose the proper number of clusters #Number of genes per cluster ArabClusters<-as.data.frame(ArabClustering$classification) head(ArabClusters) ArabClusters$Gene<-rownames(ArabClusters) colnames(ArabClusters)<-c('Cluster','Gene') table(ArabClusters$Cluster) #Plotting the expression profile of genes in a given cluster Cluster11<-ArabZval[which(rownames(ArabZval) %in% ArabClusters[which(ArabClusters$Cluster == 11),"Gene"]),] head(Cluster11) Cluster11<-as.data.frame(Cluster11) Cluster11$Gene<-rownames(Cluster11) head(Cluster11) Cluster11_melt<-melt(Cluster11,id.vars = 'Gene') head(Cluster11_melt) ggplot(Cluster11_melt, aes(y=value,x=variable,group=Gene))+ geom_line(colour='black')+ theme_bw()+ ylab('Relative expression value')+ xlab('Condition') ggplot(Cluster11_melt, aes(y=value,x=variable,fill=variable))+ geom_boxplot()+ theme_bw()+ ylab('Relative expression value')+ xlab('Condition')