Skip to content
Snippets Groups Projects
Commit 8c292769 authored by Eva Budinská's avatar Eva Budinská
Browse files

Upload New File

parent 3450d400
No related branches found
No related tags found
No related merge requests found
# setting up the workspace
#pathtodata<-"/run/user/1000/gvfs/smb-share:server=147.251.32.123,share=bouchal_budinska/Porovnani proteomiky a transkriptomiky_COMT_TNBC/"
pathtodata<-"R:/Porovnani proteomiky a transkriptomiky_COMT_TNBC/"
setwd(pathtodata)
#loading the data
E<-read.csv("data/norm_counts.tsv", sep="\t", header=T) # DESeq2 normalized expression data
P<-read.csv("data/20210702_110638_COMT_MDAMB231CRISPR_4349_2101_ProteinLevelReport.csv", sep="\t", header=T, dec = ".")
#reading protein annotation
protan<-read.csv("data/Protein_annotation_COMT_TNBC.csv", header=T,sep="\t")
prot_names<-protan$Genes
names(prot_names)<-protan$ProteinGroups
library(reshape)
P$ID<-paste(P$R.Condition,P$R.Replicate, sep="_")
P_wide<-reshape(P[,-c(1,2,3)], direction="wide", idvar="PG.ProteinAccessions", timevar="ID", )
colnames(P_wide)<-gsub("_","_rep",gsub("ctrl","CTRL",gsub(pattern = "PG.Quantity.", replacement = "",colnames(P_wide))))
rownames(P_wide)<-P_wide[,1]
P_wide<-P_wide[,-1]
head(P_wide)
match(colnames(P_wide),colnames(E))
## selecting common samples
## means selecting samples with all data types (practically in this case removing F11 and third replicates)
sample_sel<-intersect(colnames(P_wide),colnames(E))[which(regexpr("F11",intersect(colnames(P_wide),colnames(E)))==-1)]
E_sel<-E[,sample_sel]
P_sel<-P_wide[,sample_sel]
## preparing gene names for nicer visualization if necessary
gene_names<-E$gene_name
names(gene_names)<-E$gene_id
### starting MixOmics analysis!!! ###
library(mixOmics)
## preparing list of data matrices for mixOmics
diabloset<-list(P=t(P_sel), E=t(E_sel)) # rows are samples, hence we are transposing the matrices
## filtering parameters - it is wise to filter out variables that are not the most informative. Say based on coefficient of variation.
# let's calculate the coefficient of variation for each parameter
CV_P<-apply(diabloset$P,2, function(x) mean(x)/sd(x))
CV_E<-apply(diabloset$E,2, function(x) mean(x)/sd(x))
plot(sort(CV_P))
plot(sort(CV_E))
sel_CV_P<-which(CV_P>15)
sel_CV_E<-which(CV_E>10)
length(sel_CV_P)
length(sel_CV_E)
diabloset_filt<-list(P=t(P_sel)[,sel_CV_P], E=t(E_sel)[,sel_CV_E])
group<-c("CTRL","COMT_neg")[c(regexpr("CTRL",rownames(diabloset_filt$E))==-1)+1]
names(group)<-rownames(diabloset_filt$E)
## performing block.splsda ##
design <- matrix(0.1, ncol = length(diabloset_filt), nrow = length(diabloset_filt),
dimnames = list(names(diabloset_filt), names(diabloset_filt)))
diag(design) = 0
Y <- group
sgccda10 <- block.splsda(X = diabloset_filt,
Y = Y,
ncomp = 10,
design = design,
scheme = "centroid", scale = TRUE)
variance<-unlist(lapply(sgccda10$prop_expl_var,function(x) x[[1]])[-7])
pdf("mixOmics/variance_explained_vs_weight_comp1.pdf")
barplot(sgccda10$weights$comp1, variance[-3], xlab="variance explained by 1st component",ylab="weight in the model", names.arg = names(variance)[-3], las=2)
dev.off()
pdf("mixOmics/cimDiablo_Heatmapa.pdf", width = 28)
cimDiablo(sgccda10, margins = c(20,10), size.legend = 0.5)
dev.off()
cols<-c("red","blue")[as.numeric(as.factor(group))]
pdf("mixOmics/sgccda.pdf")
plot(sgccda10, col=c("red","blue"))
dev.off()
pdf("mixOmics/sgccda_indiv.pdf")
plotIndiv(sgccda10, col=c("red","blue"))
dev.off()
pdf("mixOmics/loadings.pdf", height = 42, width=14)
plotLoadings(sgccda10, contrib = "max")
dev.off()
plot(sort(abs(sgccda10$loadings$P[,1])))
plot(sort(abs(sgccda10$loadings$E[,1])))
#selecting top 50 loadings from each
cuttedloadings_top50<-Reduce(union,lapply(loadings(sgccda10), function(x) names(sort(abs(x[,1]), decreasing=TRUE)[1:50]))[-3])
cuttedloadings_top50_list<-lapply(loadings(sgccda10), function(x) names(sort(abs(x[,1]), decreasing=TRUE)[1:50]))[-3]
## reanalyzing block.splsda on filtered top 50 loadings just for circosPlot
diabloset_filt_top50<-list(P=diabloset_filt$P[,cuttedloadings_top50_list$P],E=diabloset_filt$E[,cuttedloadings_top50_list$E])
varnams<-cuttedloadings_top50_list
varnams$E<-gene_names[cuttedloadings_top50_list$E]
sgccda_top50 <- block.splsda(X = diabloset_filt_top50,
Y = Y,
ncomp = 10,
design = design,
scheme = "centroid", scale = TRUE)
pdf("mixOmics/circosPlot_top50_0.95.pdf")
circosPlot(sgccda_top50,comp = 1:2, cutoff = 0.95, var.names = varnams)
dev.off()
plot(diabloset_filt$P[,"Q99460"],diabloset_filt$E[,"ENSG00000170734"])
# heatmapa na top 50
pdf("mixOmics/cimDiablo_top50.pdf")
cimDiablo(sgccda_top50)
dev.off()
#### correlations ####
col2 <- colorRampPalette(c("#67001F", "#B2182B", "#D6604D", "#F4A582",
"#FDDBC7", "#FFFFFF", "#D1E5F0", "#92C5DE",
"#4393C3", "#2166AC", "#053061"))
col3 <- colorRampPalette(c("#67001F", "#B2182B", "#D6604D", "#F4A582",
"#FDDBC7", "light grey", "#D1E5F0", "#92C5DE",
"#4393C3", "#2166AC", "#053061"))
library(psych)
B<-data.frame(diabloset_filt$P,diabloset_filt$E)
corPW<-cor(B,method="pearson")
corPW.test<-corr.test(B,method="pearson")
corPW.test$p.adjusted<- corPW.test$p
diag(corPW.test$p.adjusted)<-NA
corPW.test$p.adjusted[upper.tri(corPW.test$p.adjusted)]<-p.adjust(corPW.test$p[upper.tri(corPW.test$p)], method="BH")
corPW.test$p.adjusted[lower.tri(corPW.test$p.adjusted)]<-p.adjust(corPW.test$p[lower.tri(corPW.test$p)], method="BH")
## rows genes, columns proteins ##
corPW_sel<-corPW[1:ncol(diabloset_filt$P),(ncol(diabloset_filt$P)+1):ncol(corPW)]
corPW.test_sel<-corPW.test[1:ncol(diabloset_filt$P),(ncol(diabloset_filt$P)+1):ncol(corPW)]
## rows genes, columns proteins ##
corPW_sel<-corPW[1:ncol(diabloset_filt$P),(ncol(diabloset_filt$P)+1):ncol(corPW)]
corPW.test_sel<-corPW.test[1:ncol(diabloset_filt$P),(ncol(diabloset_filt$P)+1):ncol(corPW)]
selC<-names(which(apply(corPW.test_sel,2,function(x) sum(any(x<0.05)))>0))
selR<-names(which(apply(corPW.test_sel,1,function(x) sum(any(x<0.05)))>0))
corPW_sel_top50<-corPW_sel[cuttedloadings_top50_list$P,cuttedloadings_top50_list$E]
colnames(corPW_sel_top50)<-gene_names[colnames(corPW_sel_top50)]
rownames(corPW_sel_top50)<-prot_names[rownames(corPW_sel_top50)]
hclust_proteins<-hclust(dist(corPW_sel_top50))
hclust_transcripts<-hclust(dist(t(corPW_sel_top50)))
pdf("mixOmics/correlations_pearson_top50.pdf")
corrplot::corrplot(corPW_sel_top50[hclust_proteins$labels[hclust_proteins$order],hclust_transcripts$labels[hclust_transcripts$order]], is.corr = FALSE, col = col2(100)[100:1])
dev.off()
# merging with DEG table
deg_ctrl_b1<-read.csv("DE_analysis/CTRL_vs_B1/all/DESeq2_CTRLvsB1.tsv.csv", header=T,sep = "\t", row.names = 1)
deg_ctrl_b1[cuttedloadings_top50_list$E,]
t.test(diabloset$E[,"ENSG00000124181"]~group[rownames(diabloset$E)])
boxplot(diabloset$E[,"ENSG00000124181"]~group[rownames(diabloset$E)])
deg_ctrl_b1["ENSG00000124181",]$log2FoldChange
t.test(diabloset$E[,"ENSG00000124181"]~group[rownames(diabloset$E)])
apply(log(diabloset$E[1:4,rownames(deg_ctrl_b1[c(14,15583),])]),2, function(x) t.test(x~group[rownames(diabloset$E)][1:4]))
boxplot(log(diabloset$E[,"ENSG00000124181"])~group[rownames(diabloset$E)],ylim=c(0,10))
boxplot(log(diabloset$E[,"ENSG00000196611"])~group[rownames(diabloset$E)],ylim=c(0,10))
boxplot(log(diabloset$E[,"ENSG00000103888"])~group[rownames(diabloset$E)],ylim=c(0,10))
deg_ctrl_b1[c("ENSG00000124181","ENSG00000196611","ENSG00000103888"),]
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment