diff --git a/source_code/trace_plots.R b/source_code/trace_plots.R new file mode 100644 index 0000000000000000000000000000000000000000..1aef827524e0eec43659c56e618e5b6a88e232b6 --- /dev/null +++ b/source_code/trace_plots.R @@ -0,0 +1,118 @@ +# edge_freq_thres is the edge weights quantile, which is the minimal edge weight accepted in the resulting network +#' @export +trace_plots <- function(mcmc_res, burn_in, thin, figures_dir, gene_annot, PK, OMICS_module_res, edge_weights = "MCMC_freq", edge_freq_thres = NULL, gene_ID, TFtargs) +{ + if(!(edge_weights %in% c("MCMC_freq","empB"))) + { + print('Warning: edge_weights argument must be either "MCMC_freq" or "empB".') + } + + if(!dir.exists(figures_dir)){dir.create(figures_dir)} + + # Trace plot of beta values + df1 <- data.frame(beta = mapply(mcmc_res$beta_tuning,FUN=function(x) x$value), k=1:length(mapply(mcmc_res$beta_tuning,FUN=function(x) x$value)), accept = 1) + + # RMS strength (threshold) + rms_strength <- abs(diff(mcmc_res$sampling.phase_res$rms)) + strength_threshold <- quantile(rms_strength, 0.75, na.rm = TRUE) + + # custom.strength estimates the strength of each arc as its empirical frequency over a set of networks: + cpdags1 <- unique(mcmc_res$sampling.phase_res$mcmc_sim_part_res$seed1$cpdags[(burn_in/thin+1):length(mcmc_res$sampling.phase_res$mcmc_sim_part_res$seed1$cpdags)]) + cpdags2 <- unique(mcmc_res$sampling.phase_res$mcmc_sim_part_res$seed2$cpdags[(burn_in/thin+1):length(mcmc_res$sampling.phase_res$mcmc_sim_part_res$seed2$cpdags)]) + + cpdag_weights1 <- custom.strength(cpdags1, nodes = bnlearn::nodes(cpdags1[[1]]), weights = NULL) + cpdag_weights2 <- custom.strength(cpdags2, nodes = bnlearn::nodes(cpdags2[[1]]), weights = NULL) + cpdag_weights1 <- cpdag_weights1[cpdag_weights1$direction>=0.5,] + cpdag_weights2 <- cpdag_weights2[cpdag_weights2$direction>=0.5,] + + cpdag_weights1$edge <- paste(cpdag_weights1$from, cpdag_weights1$to, sep="_") + cpdag_weights2$edge <- paste(cpdag_weights2$from, cpdag_weights2$to, sep="_") + cpdag_weights <- merge(cpdag_weights1, cpdag_weights2, by = "edge") + cpdag_weights$strength <- round(rowMeans(cbind(cpdag_weights$strength.x, cpdag_weights$strength.y)),2) + + if(!is.null(edge_freq_thres)) + { + strength_quant <- quantile(x = cpdag_weights$strength, probs = edge_freq_thres) + cpdag_weights <- cpdag_weights[cpdag_weights$strength >= strength_quant,] + } + + total <- merge(cpdag_weights1, cpdag_weights2, by = c("from","to")) + + svg(paste(figures_dir,"beta_values.svg",sep="/")) + plot(df1$beta ~ df1$k, type = "l", col= "darkblue", + main = "Beta values of adaptive MCMC", + xlab = "iteration", + ylab = "beta") + dev.off() + + svg(paste(figures_dir,"post_prob_edges.svg",sep="/")) + plot(total$strength.x ~ total$strength.y, + main = "Consistency of edges posterior probabilities", + xlab="MCMC run 2", + ylab = "MCMC run 1") + abline(0,1, col="orange") + dev.off() + + svg(paste(figures_dir,"convergence_RMS.svg",sep="/")) + plot(rms_strength, main="Convergence RMS strength (C.RMS.str)", pch = 18, col="gray30") + abline(h=strength_threshold, col="#E69F00", lwd = 1.5) + text(label = paste("3rd quartile of C.RMS.str = ", round(strength_threshold,3),sep=""), x = 100, y = strength_threshold+0.015, col="#E69F00") + dev.off() + + PK <- PK[PK$src_entrez %in% unlist(lapply(OMICS_module_res$omics,colnames)),] + PK <- PK[PK$dest_entrez %in% unlist(lapply(OMICS_module_res$omics,colnames)),] + + if(gene_ID=="entrezID") + { + edge_list <- matrix(data = c(cpdag_weights$from.x, + cpdag_weights$to.x, + cpdag_weights$strength, + rep(NA,length(cpdag_weights$strength)), + rep(NA,length(cpdag_weights$strength))), + nrow = length(cpdag_weights$strength), + dimnames = list(c(), c("from", "to", "weight", "edge_type", "edge"))) + # needs to be sorted because of colors in the final figure + node_list <- unique(c(edge_list[,"from"], edge_list[,"to"])) + edge_list[,"edge"] <- paste(edge_list[,"from"], edge_list[,"to"], sep="_") + return_list <- edge_types(mcmc_res = mcmc_res, PK = PK, gene_annot = gene_annot, edge_list = edge_list, node_list = node_list, OMICS_module_res = OMICS_module_res, edge_weights = edge_weights, TFtargs = TFtargs) + } else if (gene_ID=="gene_symbol") { + from <- as.character(gene_annot$gene_symbol[match(cpdag_weights$from.x, gene_annot$entrezID)]) + from[is.na(from)] <- cpdag_weights$from.x[is.na(from)] + from[regexpr("entrezid",from)>0] <- tolower(as.character(gene_annot$gene_symbol[match(toupper(from[regexpr("entrezid",from)>0]), gene_annot$entrezID)])) + to <- as.character(gene_annot$gene_symbol[match(cpdag_weights$to.x, gene_annot$entrezID)]) + edge_list <- matrix(data = c(from, + to, + cpdag_weights$strength, + rep(NA,length(cpdag_weights$strength)), + rep(NA,length(cpdag_weights$strength))), + nrow = length(cpdag_weights$strength), + dimnames = list(c(), c("from", "to", "weight", "edge_type", "edge"))) + # needs to be sorted because of colors in the final figure + node_list <- unique(c(edge_list[,"from"], edge_list[,"to"])) + edge_list[,"edge"] <- paste(edge_list[,"from"], edge_list[,"to"], sep="_") + return_list <- edge_types(mcmc_res = mcmc_res, PK = PK, gene_annot = gene_annot, edge_list = edge_list, node_list = node_list, OMICS_module_res = OMICS_module_res, edge_weights = edge_weights, TFtargs = TFtargs) + } else { + print('gene_ID argument must be either "entrezID" or "gene_symbol"') + } # end if else if else (gene_ID=="entrezID") + + net_weighted <- graph_from_edgelist(return_list$edge_list[,c("from","to")]) + V(net_weighted)$color <- return_list$node_list[match(as_ids(V(net_weighted)),return_list$node_list[,"label"]),"color"] + palette <- return_list$node_palette + names(palette) <- 1:length(palette) + palette <- palette[unique(V(net_weighted)$color)] + V(net_weighted)$label <- return_list$node_list[match(as_ids(V(net_weighted)),return_list$node_list[,"label"]),"label"] + E(net_weighted)$edge <- return_list$edge_list[match(sub("|","_",as_ids(E(net_weighted)), fixed = TRUE),return_list$edge_list[,"edge"]),"edge_type"] + E(net_weighted)$weight <- return_list$edge_list[match(sub("|","_",as_ids(E(net_weighted)), fixed = TRUE),return_list$edge_list[,"edge"]),"weight"] + + # arrow in the network plot + V(net_weighted)$degree <- igraph::degree(net_weighted, mode = "in") + V(net_weighted)$degree <- normalise(V(net_weighted)$degree, to = c(3, 11)) + + return(list(edge_list = return_list$edge_list, + node_list = return_list$node_list, + borders_GE = return_list$borders_GE, + borders_CNV = return_list$borders_CNV, + borders_METH = return_list$borders_METH, + node_palette = palette, + net_weighted = net_weighted)) +}