ipsc_mn_als_datasets.als.labels <- ipsc_mn_als_datasets.res %>% filter(padj < 0.05, gene_name %in% c(ALS_genes, RNA_BINDING_PROTEINS, p53.signalling.pathway, gprofiler_database$`DNA Damage Response`)) %>% distinct(gene_name) %>% pull(gene_name)
ipsc_mn_als_datasets.gene.labels <- ipsc_mn_als_datasets.res %>% filter(!grepl("^AC|^AL", gene_name)) %>% top_n(20, wt = -pvalue) %>% pull(gene_name)
ipsc_mn_als_datasets.volcano <- volcano_plot(ipsc_mn_als_datasets.res, numerator = "ALS", denomenator = "CTRL",
subtitle = "106 Controls vs 323 ALS", ymax = 8, xmax = 2, dpi = 72, gene_labels = c(ipsc_mn_als_datasets.als.labels, ipsc_mn_als_datasets.gene.labels))
ipsc_mn_als_datasets.volcano
ipsc_mn_als_datasets.gost <- ipsc_mn_als_datasets.res %>% filter(padj < 0.05) %>% group_split(log2FoldChange > 0) %>% map("gene_id") %>% gost()
ipsc_mn_als_datasets.gprofiles <- ipsc_mn_als_datasets.gost$result %>% filter(!str_detect(source, "TF|CORUM")) %>% arrange( -log10(p_value) ) %>% mutate(query = case_when(query == "query_1" ~ " ALS Down ", query == "query_2" ~ " ALS Up "), term_name = as.factor(term_name))
ipsc_mn_als_datasets.gprofiles_down <- ipsc_mn_als_datasets.gprofiles %>% filter(query == " ALS Down ")
ipsc_mn_als_datasets.gprofiles_up <- ipsc_mn_als_datasets.gprofiles %>% filter(query == " ALS Up ")
# paste('"',unique(ipsc_mn_als_datasets.gprofiles_down$term_name),'",', sep = "", collapse = '\n') %>% cat()
down_list <- c(
"neuron fate commitment",
"cell differentiation in spinal cord",
"DNA-binding transcription factor activity, RNA polymerase II-specific",
"ventral spinal cord development",
"cis-regulatory region sequence-specific DNA binding"
)
ipsc_mn_als_datasets.gprofiles_filt_down <- ipsc_mn_als_datasets.gprofiles %>% filter(query == " ALS Down ") %>% filter(term_name %in% down_list)
# paste('"',unique(ipsc_mn_als_datasets.gprofiles_up$term_name),'",', sep = "", collapse = '\n') %>% cat()
up_list <- c(
"p53 transcriptional gene network",
"Genotoxicity pathway",
"miRNA regulation of DNA damage response",
"DNA damage response",
"p53 signaling pathway")
ipsc_mn_als_datasets.gprofiles_filt_up <- ipsc_mn_als_datasets.gprofiles %>% filter(query == " ALS Up ") %>% filter(term_name %in% up_list)
ipsc_mn_als_datasets.gprofiles_filt_combined <- bind_rows(ipsc_mn_als_datasets.gprofiles_filt_down, ipsc_mn_als_datasets.gprofiles_filt_up) %>% mutate(query = factor(query, levels = c(" ALS Up ", " ALS Down ")))
ipsc_mn_als_datasets.gprofiles_filt_combined = ipsc_mn_als_datasets.gprofiles_filt_combined %>% mutate(term_name = gsub("miRNA regulation of DNA damage response", "miRNA regulation of DNA damage", term_name), term_name = gsub("and","&",term_name), term_name = gsub("DNA-binding transcription factor activity, RNA polymerase II-specific","transcription factor activity",term_name), term_name = gsub("cis-regulatory region sequence-specific DNA binding","DNA binding",term_name), term_name = gsub("cell ","",term_name))
ipsc_mn_als_datasets.go_curated <- curated_profiles(gprofiles = ipsc_mn_als_datasets.gprofiles_filt_combined)
ipsc_mn_als_datasets.go_curated
ipsc_mn_als_datasets.geneList <- ipsc_mn_als_datasets.res %>% drop_na(stat) %>% pull(stat)
names(ipsc_mn_als_datasets.geneList) <- ipsc_mn_als_datasets.res %>% drop_na(stat) %>% pull(gene_name) %>% as.character()
ipsc_mn_als_datasets.geneList = sort(ipsc_mn_als_datasets.geneList, decreasing = TRUE)
# p53 signalling pathway
p53.signalling_genes.list <- list("p53.signalling" = ipsc_mn_als_datasets.res$gene_name[ipsc_mn_als_datasets.res$gene_name %in% go.pathways.msigdb$GO_SIGNAL_TRANSDUCTION_BY_P53_CLASS_MEDIATOR])
ipsc_mn_als_datasets.p53.signalling_fgseaRes <- fgsea(pathways = p53.signalling_genes.list, stats = ipsc_mn_als_datasets.geneList)
ipsc_mn_als_datasets.p53.signalling.gseaplot = plotEnrichment(p53.signalling_genes.list[["p53.signalling"]], ipsc_mn_als_datasets.geneList) +
theme(axis.text=element_text(size=8), axis.title=element_text(size=10)) + ggtitle("p53 signalling") + xlab("") +
annotate("text", x = 35000, y = 0.3, label = paste0("NES = ", round(ipsc_mn_als_datasets.p53.signalling_fgseaRes$NES, digits = 3),"\nP = ", round(ipsc_mn_als_datasets.p53.signalling_fgseaRes$pval, digits = 6)), size = 2.8, hjust = 0) + theme_oz()
ipsc_mn_als_datasets.p53.signalling.gseaplot
https://www.bioconductor.org/packages/release/bioc/vignettes/decoupleR/inst/doc/pw_bk.html https://github.com/saezlab/transcriptutorial/blob/master/scripts/03_Pathway_activity_with_Progeny.md https://saezlab.github.io/progeny/
Pathway RespOnsive GENes (PROGENy) estimates signaling pathway activity
net_progeny <- get_progeny(organism = 'human', top = 100)
ipsc_mn_als_datasets.res.matrix <- ipsc_mn_als_datasets.res %>% select(gene_name, stat) %>% drop_na(stat) %>% distinct(gene_name, .keep_all = TRUE) %>% tibble_to_matrix(stat, row_names = "gene_name")
ipsc_mn_als_progeny.contrast_acts <- run_wmean(mat=ipsc_mn_als_datasets.res.matrix, net=net_progeny, .source='source', .target='target', .mor='weight', times = 1000, minsize = 5) %>% filter(statistic == 'norm_wmean') %>% mutate(p.signif = case_when(p_value < 0.003 ~ "***", p_value < 0.009 ~ "***",p_value < 0.01 ~ "**",p_value < 0.05 ~ "*", TRUE ~ ""), group1 = source, group2 = source)
ipsc_mn_als_progeny.pathwayActivity <- ggplot(ipsc_mn_als_progeny.contrast_acts, aes(x=reorder(source, score), y = score)) +
geom_bar(aes(fill = score), stat = "identity") +
scale_fill_gradient2(low = "darkblue", high = "indianred", mid = "whitesmoke", midpoint = 0) +
theme_oz() + theme(axis.text.x = element_text(angle = 90), legend.position = "none", plot.title = element_text(hjust = 0.5)) +
labs(title = "Signaling Pathways", x = "", y = "ALS vs CTRL Normalised enrichment") +
stat_pvalue_manual(ipsc_mn_als_progeny.contrast_acts, label = "p.signif", y.position = 14, xmin = "source", xmax = NULL, size = 3, hide.ns = TRUE)
ipsc_mn_als_progeny.pathwayActivity
p53 weights
ipsc_mn_als_progeny.p53weights = net_progeny %>% filter(source == "p53", target %in% ipsc_mn_als_datasets.res$gene_name) %>% left_join(select(ipsc_mn_als_datasets.res, target=gene_name,stat)) %>%
mutate(color = case_when(weight > 0 & stat > 0 ~ '1', weight > 0 & stat < 0 ~ '2', weight < 0 & stat > 0 ~ '2', weight < 0 & stat < 0 ~ '1', TRUE ~ "3"))
ipsc_mn_als_progeny.p53weights.plot = ggplot(ipsc_mn_als_progeny.p53weights, aes(x = weight, y = stat, color = color)) + geom_point(size = 0.5) +
scale_colour_manual(values = c("red","royalblue3","grey")) +
geom_text_repel(fontface = "italic", data = filter(ipsc_mn_als_progeny.p53weights, weight > 5, stat > 3), aes(label = target), max.overlaps = Inf, min.segment.length = unit(0, "lines"), size = 2.1) +
theme_oz() + theme(legend.position = "none", plot.title = element_text(hjust = 0.5)) +
labs(title = "p53 pathway weights", x = "p53 pathway gene weight", y = "ALS vs CTRL test statistic") +
geom_vline(xintercept = 0, linetype = 'dotted') + geom_hline(yintercept = 0, linetype = 'dotted') +
coord_cartesian(xlim = c(-8,18), ylim = c(-2.5,5))
ipsc_mn_als_progeny.p53weights.plot
https://www.bioconductor.org/packages/release/bioc/vignettes/decoupleR/inst/doc/tf_bk.html https://github.com/saezlab/transcriptutorial/blob/master/scripts/04_TranscriptionFactor_activity_with_Dorothea.md
net_dorothea <- get_dorothea(organism='human', levels=c('A', 'B', 'C'))
ipsc_mn_als_dorothea.contrast_acts <- run_wmean(mat=ipsc_mn_als_datasets.res.matrix, net=net_dorothea, .source='source', .target='target', .mor='mor', times = 1000, minsize = 5) %>% filter(statistic == 'norm_wmean') %>%
mutate(p.signif = case_when(p_value < 0.05 ~ "*", TRUE ~ ""), sig = case_when(p_value < 0.135 & abs(score) < 4 ~ "sig", p_value < 0.135 & abs(score) >= 4 ~ "sig_strong", p_value >= 0.135 ~ "non_sig"), direction = ifelse(score > 0, "up", "down"), class = paste(sig, direction))
ipsc_mn_als_dorothea.contrast_acts.labels.up = ipsc_mn_als_dorothea.contrast_acts %>% top_n(score, n = 3) %>% pull(source)
ipsc_mn_als_dorothea.contrast_acts.labels.down = ipsc_mn_als_dorothea.contrast_acts %>% top_n(-score, n = 3) %>% pull(source)
# Volcano of TFs score vs p_value
ipsc_mn_als_dorothea.contrast_acts.volcano = ggplot(ipsc_mn_als_dorothea.contrast_acts, aes(x = score, y = -log10(p_value))) + geom_point(aes(colour = class)) +
scale_colour_manual(values = c("non_sig up" = "gray", "non_sig down" = "gray", "sig up" = "#F36F6F", "sig_strong up" = "#EB4445", "sig down" = "#A6CEE3", "sig_strong down" = "#79B1D3")) +
labs(y = expression(-log[10]~P~value), x = "Normalised enrichment ALS vs CTRL", title = "Transcription Factors") +
guides(colour = "none") +
theme_oz() + theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5), panel.border = element_blank(), axis.ticks = element_line(colour = "black") ) +
geom_text_repel(fontface = "italic", data = filter(ipsc_mn_als_dorothea.contrast_acts, source %in% c(ipsc_mn_als_dorothea.contrast_acts.labels.up, ipsc_mn_als_dorothea.contrast_acts.labels.down)), aes(label = source), max.overlaps = Inf, min.segment.length = unit(0, "lines"), size = 2.5) +
geom_vline(xintercept = 0, linetype = 'dotted') + ylim(c(0,2.8))
ipsc_mn_als_dorothea.contrast_acts.volcano
TP53 target genes
ipsc_mn_als_dorothea.tp53 <- net_dorothea %>% filter(source == "TP53") %>% mutate(gene_name = target) %>% left_join(select(ipsc_mn_als_datasets.res, gene_name,stat,log2FoldChange,pvalue)) %>%
mutate(color = case_when(mor > 0 & stat > 0 ~ '1', mor > 0 & stat < 0~'2',mor < 0 & stat > 0~'2',mor < 0 & stat < 0~'1',TRUE ~ "3"))
ipsc_mn_als_dorothea.tp53.volcano = ggplot(ipsc_mn_als_dorothea.tp53, aes(x = log2FoldChange, y = -log10(pvalue), color = color, size=abs(mor))) + geom_point() + scale_size(range = c(0, 3)) +
scale_colour_manual(values = c("red","royalblue3","grey")) +
geom_text_repel(data = filter(ipsc_mn_als_dorothea.tp53, abs(stat) > 3), aes(label = gene_name, size=1), fontface = "italic", max.overlaps = Inf, size = 2.3) +
theme_oz() + theme(legend.position = "none", plot.title = element_text(hjust = 0.5)) + geom_vline(xintercept = 0, linetype = 'dotted') + geom_hline(yintercept = 0, linetype = 'dotted') +
labs(title = "TP53 regulon", x = "log2 fold change (ALS vs CTRL)", y = expression(-log[10]~P~value)) + ylim(c(0,2)) +
geom_text_repel(fontface = "italic", data = filter(ipsc_mn_als_dorothea.tp53, log2FoldChange > 0.25, -log10(pvalue) > 1.0), aes(label = target), max.overlaps = Inf, min.segment.length = unit(0, "lines"), size = 2.5)
ipsc_mn_als_dorothea.tp53.volcano
ipsc_mn_als_datasets.res.sig = ipsc_mn_als_datasets.res %>% filter(padj < 0.05) %>% select(gene_name, everything())
kbl(ipsc_mn_als_datasets.res.sig) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), fixed_thead = T) %>% scroll_box(width = "100%", height = "500px")
gene_name | gene_id | baseMean | log2FoldChange | lfcSE | stat | pvalue | padj |
---|---|---|---|---|---|---|---|
MTCO1P12 | ENSG00000237973 | 572.701897 | -1.6010349 | 0.1962405 | -8.158533 | 0.00e+00 | 0.0000000 |
ADGRG7 | ENSG00000144820 | 8.018303 | 3.3092857 | 0.5420888 | 6.104693 | 0.00e+00 | 0.0000141 |
RNASEL | ENSG00000135828 | 149.633190 | 0.5962462 | 0.1171485 | 5.089660 | 4.00e-07 | 0.0032664 |
CHRND | ENSG00000135902 | 6.225069 | -0.6088888 | 0.1269986 | -4.794454 | 1.60e-06 | 0.0077084 |
TBX5 | ENSG00000089225 | 3.673370 | -3.1032906 | 0.6499186 | -4.774891 | 1.80e-06 | 0.0077084 |
ZNF676 | ENSG00000196109 | 3.733748 | 3.3438001 | 0.7011752 | 4.768851 | 1.90e-06 | 0.0077084 |
AC025171.1 | ENSG00000177738 | 46.076146 | -0.3937060 | 0.0827819 | -4.755944 | 2.00e-06 | 0.0077084 |
TMSB15B | ENSG00000158427 | 653.227425 | -0.2384203 | 0.0510804 | -4.667548 | 3.00e-06 | 0.0086407 |
PTCHD4 | ENSG00000244694 | 834.105420 | 0.3292627 | 0.0706018 | 4.663660 | 3.10e-06 | 0.0086407 |
NDRG2 | ENSG00000165795 | 581.833220 | -0.1970758 | 0.0422915 | -4.659941 | 3.20e-06 | 0.0086407 |
MTCO2P12 | ENSG00000229344 | 7.853073 | -1.4451766 | 0.3127088 | -4.621477 | 3.80e-06 | 0.0092457 |
MYOG | ENSG00000122180 | 4.184301 | -0.9060086 | 0.1967146 | -4.605702 | 4.10e-06 | 0.0092457 |
TCEAL6 | ENSG00000204071 | 57.699706 | 0.6145108 | 0.1338354 | 4.591542 | 4.40e-06 | 0.0092457 |
TNFRSF10B | ENSG00000120889 | 5046.827884 | 0.2082392 | 0.0461122 | 4.515926 | 6.30e-06 | 0.0116535 |
THSD1 | ENSG00000136114 | 99.370506 | 0.2534239 | 0.0561581 | 4.512689 | 6.40e-06 | 0.0116535 |
AC120036.4 | ENSG00000255366 | 162.220351 | 0.4677634 | 0.1040612 | 4.495078 | 7.00e-06 | 0.0116535 |
POU5F1 | ENSG00000204531 | 62.072375 | -1.8657621 | 0.4158933 | -4.486155 | 7.30e-06 | 0.0116535 |
ISCU | ENSG00000136003 | 1487.773856 | 0.1030001 | 0.0230432 | 4.469863 | 7.80e-06 | 0.0118787 |
TP53TG3E | ENSG00000275034 | 2.542112 | 7.1535266 | 1.6299732 | 4.388739 | 1.14e-05 | 0.0163788 |
SESN1 | ENSG00000080546 | 1249.025842 | 0.1711968 | 0.0391061 | 4.377748 | 1.20e-05 | 0.0163788 |
LMO4 | ENSG00000143013 | 1893.406368 | -0.1959024 | 0.0450082 | -4.352595 | 1.35e-05 | 0.0167379 |
RRM2B | ENSG00000048392 | 1858.481757 | 0.2020891 | 0.0464341 | 4.352172 | 1.35e-05 | 0.0167379 |
SOD2-OT1 | ENSG00000285427 | 40.135367 | -0.2587170 | 0.0596421 | -4.337828 | 1.44e-05 | 0.0170913 |
DACH2 | ENSG00000126733 | 214.728551 | -0.4718518 | 0.1098737 | -4.294493 | 1.75e-05 | 0.0198326 |
CDKN1A | ENSG00000124762 | 2557.463659 | 0.3542375 | 0.0827034 | 4.283226 | 1.84e-05 | 0.0198326 |
PCDH12 | ENSG00000113555 | 14.374543 | 0.7988895 | 0.1867528 | 4.277790 | 1.89e-05 | 0.0198326 |
FOXN4 | ENSG00000139445 | 41.735793 | -0.8249476 | 0.1938962 | -4.254583 | 2.09e-05 | 0.0211906 |
CROCC2 | ENSG00000226321 | 8.794508 | -1.5582071 | 0.3704684 | -4.206046 | 2.60e-05 | 0.0253547 |
PDE8B | ENSG00000113231 | 185.056712 | -0.4275982 | 0.1025237 | -4.170726 | 3.04e-05 | 0.0286020 |
OLIG2 | ENSG00000205927 | 193.846024 | -0.7014318 | 0.1688481 | -4.154218 | 3.26e-05 | 0.0297222 |
AC026367.3 | ENSG00000275759 | 9.932144 | 0.4831218 | 0.1172603 | 4.120079 | 3.79e-05 | 0.0328980 |
MIR34AHG | ENSG00000228526 | 339.051491 | 0.3735483 | 0.0908080 | 4.113608 | 3.90e-05 | 0.0328980 |
PLIN5 | ENSG00000214456 | 30.874450 | -0.4650748 | 0.1132377 | -4.107068 | 4.01e-05 | 0.0328980 |
OLIG1 | ENSG00000184221 | 98.491406 | -0.7260811 | 0.1770438 | -4.101139 | 4.11e-05 | 0.0328980 |
RPL23AP2 | ENSG00000225067 | 4.347676 | 1.4318105 | 0.3496167 | 4.095372 | 4.21e-05 | 0.0328980 |
AACSP1 | ENSG00000250420 | 62.083860 | -1.0099581 | 0.2490903 | -4.054587 | 5.02e-05 | 0.0371376 |
ABL2 | ENSG00000143322 | 2631.767326 | 0.1245984 | 0.0307329 | 4.054229 | 5.03e-05 | 0.0371376 |
AL450405.1 | ENSG00000230202 | 58.424878 | 0.8107089 | 0.2005597 | 4.042233 | 5.29e-05 | 0.0380616 |
PAQR4 | ENSG00000162073 | 878.096823 | -0.1423603 | 0.0353904 | -4.022565 | 5.76e-05 | 0.0397368 |
RPL29P11 | ENSG00000224858 | 9.135389 | -0.9411368 | 0.2341103 | -4.020057 | 5.82e-05 | 0.0397368 |
TNNC2 | ENSG00000101470 | 6.241813 | -0.5157090 | 0.1286956 | -4.007200 | 6.14e-05 | 0.0402053 |
OPCML-IT1 | ENSG00000254896 | 21.400539 | -0.4982370 | 0.1243796 | -4.005778 | 6.18e-05 | 0.0402053 |
FBXO22 | ENSG00000167196 | 2179.343449 | 0.1144354 | 0.0288417 | 3.967703 | 7.26e-05 | 0.0461030 |
ipsc_mn_als_dorothea.contrast_acts <- run_wmean(mat=ipsc_mn_als_datasets.res.matrix, net=net_dorothea, .source='source', .target='target', .mor='mor', times = 100, minsize = 5) %>% filter(statistic == 'norm_wmean') %>% select(-statistic, -condition, transcription_factor = source, normalised_enrichment_score = score) %>% arrange(p_value, -abs(normalised_enrichment_score)) %>% top_n(n = 20, wt = abs(normalised_enrichment_score))
kbl(ipsc_mn_als_dorothea.contrast_acts) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), fixed_thead = T) %>% scroll_box(width = "100%", height = "500px")
transcription_factor | normalised_enrichment_score | p_value |
---|---|---|
TP53 | 7.888387 | 0.02 |
PRDM14 | -7.249614 | 0.02 |
ZNF274 | 6.598009 | 0.02 |
ZBTB33 | 6.299956 | 0.02 |
ATF4 | 6.233782 | 0.02 |
SOX2 | -5.917799 | 0.02 |
TFAP2C | -5.878825 | 0.02 |
ZNF263 | -5.696282 | 0.02 |
ONECUT1 | -5.518476 | 0.02 |
SIX5 | -5.254948 | 0.02 |
ZBTB11 | 5.042477 | 0.02 |
CTCF | -4.874786 | 0.02 |
ZNF639 | 4.751471 | 0.02 |
SNAPC4 | -4.650668 | 0.02 |
GLI2 | -4.376095 | 0.02 |
SIX2 | -4.344603 | 0.02 |
HMBOX1 | 4.132908 | 0.02 |
ESR1 | -4.132246 | 0.02 |
IRF3 | -3.943642 | 0.02 |
STAT1 | 3.941067 | 0.10 |
ipsc_mn_als_datasets.geneList <- ipsc_mn_als_datasets.res %>% drop_na(stat) %>% pull(stat)
names(ipsc_mn_als_datasets.geneList) <- ipsc_mn_als_datasets.res %>% drop_na(stat) %>% pull(gene_name) %>% as.character()
ipsc_mn_als_datasets.geneList = sort(ipsc_mn_als_datasets.geneList, decreasing = TRUE)
gprofiler_database.DNA_damage_pathways = gprofiler_database[grepl('DNA damage|p53', names(gprofiler_database))]
gprofiler_database.DNA_damage_pathways_fgseaRes <- fgsea(pathways = gprofiler_database.DNA_damage_pathways, stats = ipsc_mn_als_datasets.geneList)
gprofiler_database.DNA_damage.filt = filter(gprofiler_database.DNA_damage_pathways_fgseaRes, size >7, !grepl("uncertain|negative|prostate",pathway)) %>% mutate(p.signif = case_when(pval < 0.003 ~ "***", pval < 0.009 ~ "***",pval < 0.01 ~ "**",pval < 0.05 ~ "*", TRUE ~ ""), group1 = "pathway", group2 = "pathway")
gprofiler_database.DNA_damage_pathways_fgseaRes.barplot <- ggplot(gprofiler_database.DNA_damage.filt, aes(x=reorder(pathway, NES), y = NES)) +
geom_bar(aes(fill = NES), stat = "identity") +
scale_fill_gradient2(low = "darkblue", high = "indianred", mid = "whitesmoke", midpoint = 0) +
theme_oz() + theme(legend.position = "none", plot.title = element_text(hjust = 0.5)) +
labs(title = "DNA damage pathways", y = "ALS vs CTRL iPSMNs\nNormalised Enrichment Score", x = "") + geom_hline(yintercept = 0, linetype = 3) +
stat_pvalue_manual(gprofiler_database.DNA_damage.filt, label = "p.signif", y.position = 2, size = 4, xmin = "pathway", xmax = NULL, hide.ns = TRUE) + coord_flip()
gprofiler_database.DNA_damage_pathways_fgseaRes.barplot
ipsc_mn_als_datasets.polyA.pcaData <- plotPCA(ipsc_mn_als_datasets.polyA.vsd, intgroup=c("sample", "dataset", "database_dir", "condition", "mutation", "dataset_sample", "strandedness", "onset_site", "study_accession", "DIV", "instrument","library_type"), returnData=TRUE) %>% as_tibble() %>%
mutate(dataset = as.character(dataset), dataset = gsub("dafianca","dafinca",dataset), dataset_sample = gsub("dafianca","dafinca",dataset_sample), dataset = case_when(dataset == "neurolincs0" & grepl("^0007",sample) ~ "neurolincs_diMN", dataset == "neurolincsA" & grepl("^A-042",sample) ~ "neurolincs_iMN", dataset == "answerals" ~ "AnswerALS", dataset %in% c("dafinca.c9orf72","dafinca.tardbp") ~ "dafinca", TRUE ~ dataset),
DIV = as.character(DIV), DIV = case_when(is.na(DIV) ~ "30", TRUE ~ DIV), database_dir = gsub("/camp/lab/patanir/home/shared/projects/patani-collab/public-data","/camp/project/proj-luscombn-patani/working/public-data",database_dir),
library_type = case_when(dataset %in% c("catanese","dafinca","kapeli","kiskinis","luisier","mitchell","sareen","sommer","smith","sterneckert","wang") ~ "poly(A)", TRUE ~ "Total")) %>%
left_join(select(filter(ipsc_mn_als_datasets.metadata, library_type == "poly(A)"), dataset_sample, gender))
ipsc_mn_als_datasets.polyA.pca_dataset <- ggplot(ipsc_mn_als_datasets.polyA.pcaData, aes(PC1, PC2, color=dataset)) + geom_point(size=1) +
theme_oz() + theme(legend.text=element_text(size=8), legend.title=element_text(size=7), legend.position = "top", legend.spacing = unit(0.01, 'cm')) +
guides(color=guide_legend(keywidth=0.1, keyheight=0.1, default.unit="inch")) +
xlab(paste0("PC1: ",round(100 * attr(ipsc_mn_als_datasets.polyA.pcaData, "percentVar"))[1],"% variance")) + ylab(paste0("PC2: ",round(100 * attr(ipsc_mn_als_datasets.polyA.pcaData, "percentVar"))[2],"% variance")) + labs(color="polyA\ndatasets") +
ggforce::geom_mark_ellipse(aes(label = dataset), label.fontsize = 8, label.fontface = "plain", con.type = "straight", con.cap = 0.5, show.legend = FALSE)
ipsc_mn_als_datasets.polyA.pca_dataset
ipsc_mn_als_datasets.polyA.als.labels <- ipsc_mn_als_datasets.polyA.res %>% filter(padj < 0.05, gene_name %in% c(ALS_genes, RNA_BINDING_PROTEINS, p53.signalling.pathway, gprofiler_database$`DNA Damage Response`)) %>% distinct(gene_name) %>% pull(gene_name)
ipsc_mn_als_datasets.polyA.gene.labels <- ipsc_mn_als_datasets.polyA.res %>% filter(!grepl("^AC|^AL", gene_name)) %>% top_n(20, wt = -pvalue) %>% pull(gene_name)
ipsc_mn_als_datasets.polyA.volcano <- volcano_plot(ipsc_mn_als_datasets.polyA.res, numerator = "ALS", denomenator = "CTRL",
title = "polyA", subtitle = "43 Controls vs 48 ALS", ymax = 10, xmax = 3, dpi = 72, gene_labels = c(ipsc_mn_als_datasets.polyA.als.labels, ipsc_mn_als_datasets.polyA.gene.labels,"SESN1","TCEAL6"), distinct_labels = TRUE)
ipsc_mn_als_datasets.polyA.volcano
net_progeny <- get_progeny(organism = 'human', top = 100)
ipsc_mn_als_datasets.polyA.res.matrix <- ipsc_mn_als_datasets.polyA.res %>% select(gene_name, stat) %>% drop_na(stat) %>% distinct(gene_name, .keep_all = TRUE) %>% tibble_to_matrix(stat, row_names = "gene_name")
ipsc_mn_als_datasets.polyA_progeny.contrast_acts <- run_wmean(mat=ipsc_mn_als_datasets.polyA.res.matrix, net=net_progeny, .source='source', .target='target', .mor='weight', times = 1000, minsize = 5) %>% filter(statistic == 'norm_wmean') %>% mutate(p.signif = case_when(p_value < 0.003 ~ "****", p_value < 0.009 ~ "***",p_value < 0.01 ~ "**",p_value < 0.05 ~ "*", TRUE ~ ""), group1 = source, group2 = source)
ipsc_mn_als_datasets.polyA_progeny.pathwayActivity <- ggplot(ipsc_mn_als_datasets.polyA_progeny.contrast_acts, aes(x=reorder(source, score), y = score)) +
geom_bar(aes(fill = score), stat = "identity") +
scale_fill_gradient2(low = "darkblue", high = "indianred", mid = "whitesmoke", midpoint = 0) +
theme_oz() + theme(axis.text.x = element_text(angle = 90), legend.position = "none", plot.title = element_text(hjust = 0.5)) +
labs(title = "polyA: Signaling Pathways", x = "", y = "ALS vs CTRL enrichment") +
stat_pvalue_manual(ipsc_mn_als_datasets.polyA_progeny.contrast_acts, label = "p.signif", y.position = 12, xmin = "source", xmax = NULL, size = 6, hide.ns = TRUE)
ipsc_mn_als_datasets.polyA_progeny.pathwayActivity
net_dorothea <- get_dorothea(organism='human', levels=c('A', 'B', 'C'))
ipsc_mn_als_datasets.polyA_dorothea.contrast_acts <- run_wmean(mat=ipsc_mn_als_datasets.polyA.res.matrix, net=net_dorothea, .source='source', .target='target', .mor='mor', times = 1000, minsize = 5) %>% filter(statistic == 'norm_wmean') %>%
mutate(p.signif = case_when(p_value < 0.05 ~ "*", TRUE ~ ""), sig = case_when(p_value < 0.135 & abs(score) < 4 ~ "sig", p_value < 0.135 & abs(score) >= 4 ~ "sig_strong", p_value >= 0.135 ~ "non_sig"), direction = ifelse(score > 0, "up", "down"), class = paste(sig, direction))
ipsc_mn_als_datasets.polyA_dorothea.contrast_acts.labels.up = ipsc_mn_als_datasets.polyA_dorothea.contrast_acts %>% filter(p_value < 0.15) %>% top_n(score, n = 4) %>% pull(source)
ipsc_mn_als_datasets.polyA_dorothea.contrast_acts.labels.down = ipsc_mn_als_datasets.polyA_dorothea.contrast_acts %>% top_n(-score, n = 4) %>% pull(source)
# Volcano of TFs score vs p_value
ipsc_mn_als_datasets.polyA_dorothea.contrast_acts.volcano = ggplot(ipsc_mn_als_datasets.polyA_dorothea.contrast_acts, aes(x = score, y = -log10(p_value))) + geom_point(aes(colour = class)) +
scale_colour_manual(values = c("non_sig up" = "gray", "non_sig down" = "gray", "sig up" = "#F36F6F", "sig_strong up" = "#EB4445", "sig down" = "#A6CEE3", "sig_strong down" = "#79B1D3")) +
labs(y = expression(-log[10]~P~value), x = "Normalised enrichment ALS vs CTRL", title = "polyA: Transcription Factors") +
guides(colour = "none") +
theme_oz() + theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5), panel.border = element_blank(), axis.ticks = element_line(colour = "black") ) +
geom_text_repel(fontface = "italic", data = filter(ipsc_mn_als_datasets.polyA_dorothea.contrast_acts, source %in% c(ipsc_mn_als_datasets.polyA_dorothea.contrast_acts.labels.up, ipsc_mn_als_datasets.polyA_dorothea.contrast_acts.labels.down)), aes(label = source), max.overlaps = Inf, min.segment.length = unit(0, "lines"), size = 2.3) +
geom_vline(xintercept = 0, linetype = 'dotted') + ylim(c(0,2.8))
ipsc_mn_als_datasets.polyA_dorothea.contrast_acts.volcano
ipsc_mn_als_datasets.total.pcaData <- plotPCA(ipsc_mn_als_datasets.total.vsd, intgroup=c("sample", "dataset", "database_dir", "condition", "mutation", "dataset_sample", "strandedness", "onset_site", "study_accession", "DIV", "instrument","library_type"), returnData=TRUE) %>% as_tibble() %>%
mutate(dataset = as.character(dataset), dataset = gsub("dafianca","dafinca",dataset), dataset_sample = gsub("dafianca","dafinca",dataset_sample), dataset = case_when(dataset == "neurolincs0" & grepl("^0007",sample) ~ "neurolincs_diMN", dataset == "neurolincsA" & grepl("^A-042",sample) ~ "neurolincs_iMN", dataset == "answerals" ~ "AnswerALS", dataset %in% c("dafinca.c9orf72","dafinca.tardbp") ~ "dafinca", TRUE ~ dataset),
DIV = as.character(DIV), DIV = case_when(is.na(DIV) ~ "30", TRUE ~ DIV), database_dir = gsub("/camp/lab/patanir/home/shared/projects/patani-collab/public-data","/camp/project/proj-luscombn-patani/working/public-data",database_dir),
library_type = case_when(dataset %in% c("catanese","dafinca","kapeli","kiskinis","luisier","mitchell","sareen","sommer","smith","sterneckert","wang") ~ "poly(A)", TRUE ~ "Total")) %>%
left_join(select(filter(ipsc_mn_als_datasets.metadata, library_type == "Total"), dataset_sample, gender))
ipsc_mn_als_datasets.total.pca_dataset <- ggplot(ipsc_mn_als_datasets.total.pcaData, aes(PC1, PC2, color=dataset)) + geom_point(size=1) +
theme_oz() + theme(legend.text=element_text(size=8), legend.title=element_text(size=7), legend.position = "top", legend.spacing = unit(0.01, 'cm')) + guides(color=guide_legend(nrow=2, keywidth=0.1, keyheight=0.1, default.unit="inch")) +
xlab(paste0("PC1: ",round(100 * attr(ipsc_mn_als_datasets.total.pcaData, "percentVar"))[1],"% variance")) + ylab(paste0("PC2: ",round(100 * attr(ipsc_mn_als_datasets.total.pcaData, "percentVar"))[2],"% variance")) + labs(color="Total\ndatasets") +
ggforce::geom_mark_ellipse(aes(label = dataset), label.fontsize = 8, label.fontface = "plain", con.type = "straight", con.cap = 0.5, show.legend = FALSE)
ipsc_mn_als_datasets.total.pca_dataset
ipsc_mn_als_datasets.total.als.labels <- ipsc_mn_als_datasets.total.res %>% filter(padj < 0.05, gene_name %in% c(ALS_genes, RNA_BINDING_PROTEINS, p53.signalling.pathway, gprofiler_database$`DNA Damage Response`)) %>% distinct(gene_name) %>% pull(gene_name)
ipsc_mn_als_datasets.total.gene.labels <- ipsc_mn_als_datasets.total.res %>% filter(!grepl("^AC|^AL", gene_name)) %>% top_n(20, wt = -pvalue) %>% pull(gene_name)
ipsc_mn_als_datasets.total.volcano <- volcano_plot(ipsc_mn_als_datasets.total.res, numerator = "ALS", denomenator = "CTRL",
title = "Total", subtitle = "63 Controls vs 275 ALS", ymax = 8, xmax = 3, dpi = 72, gene_labels = c(ipsc_mn_als_datasets.total.als.labels, ipsc_mn_als_datasets.total.gene.labels,"RNASEL","SESN1", "TCEAL6","MTCO1P12","MTCO2P12","LMO4"))
ipsc_mn_als_datasets.total.volcano
ipsc_mn_als_datasets.total.res.matrix <- ipsc_mn_als_datasets.total.res %>% select(gene_name, stat) %>% drop_na(stat) %>% distinct(gene_name, .keep_all = TRUE) %>% tibble_to_matrix(stat, row_names = "gene_name")
ipsc_mn_als_datasets.total_progeny.contrast_acts <- run_wmean(mat=ipsc_mn_als_datasets.total.res.matrix, net=net_progeny, .source='source', .target='target', .mor='weight', times = 1000, minsize = 5) %>% filter(statistic == 'norm_wmean') %>% mutate(p.signif = case_when(p_value < 0.003 ~ "****", p_value < 0.009 ~ "***",p_value < 0.01 ~ "**",p_value < 0.05 ~ "*", TRUE ~ ""), group1 = source, group2 = source)
ipsc_mn_als_datasets.total_progeny.pathwayActivity <- ggplot(ipsc_mn_als_datasets.total_progeny.contrast_acts, aes(x=reorder(source, score), y = score)) +
geom_bar(aes(fill = score), stat = "identity") +
scale_fill_gradient2(low = "darkblue", high = "indianred", mid = "whitesmoke", midpoint = 0) +
theme_oz() + theme(axis.text.x = element_text(angle = 90), legend.position = "none", plot.title = element_text(hjust = 0.5)) +
labs(title = "Total: Signaling Pathways", x = "", y = "ALS vs CTRL enrichment") +
stat_pvalue_manual(ipsc_mn_als_datasets.total_progeny.contrast_acts, label = "p.signif", y.position = 8, xmin = "source", xmax = NULL, size = 6, hide.ns = TRUE)
ipsc_mn_als_datasets.total_progeny.pathwayActivity
ipsc_mn_als_datasets.total_dorothea.contrast_acts <- run_wmean(mat=ipsc_mn_als_datasets.total.res.matrix, net=net_dorothea, .source='source', .target='target', .mor='mor', times = 1000, minsize = 5) %>% filter(statistic == 'norm_wmean') %>%
mutate(p.signif = case_when(p_value < 0.05 ~ "*", TRUE ~ ""), sig = case_when(p_value < 0.135 & abs(score) < 4 ~ "sig", p_value < 0.135 & abs(score) >= 4 ~ "sig_strong", p_value >= 0.135 ~ "non_sig"), direction = ifelse(score > 0, "up", "down"), class = paste(sig, direction))
ipsc_mn_als_datasets.total_dorothea.contrast_acts.labels.up = ipsc_mn_als_datasets.total_dorothea.contrast_acts %>% top_n(score, n = 5) %>% pull(source)
ipsc_mn_als_datasets.total_dorothea.contrast_acts.labels.down = ipsc_mn_als_datasets.total_dorothea.contrast_acts %>% top_n(-score, n = 5) %>% pull(source)
# Volcano of TFs score vs p_value
ipsc_mn_als_datasets.total_dorothea.contrast_acts.volcano = ggplot(ipsc_mn_als_datasets.total_dorothea.contrast_acts, aes(x = score, y = -log10(p_value))) + geom_point(aes(colour = class)) +
scale_colour_manual(values = c("non_sig up" = "gray", "non_sig down" = "gray", "sig up" = "#F36F6F", "sig_strong up" = "#EB4445", "sig down" = "#A6CEE3", "sig_strong down" = "#79B1D3")) +
labs(y = expression(-log[10]~P~value), x = "Normalised enrichment ALS vs CTRL", title = "Total: Transcription Factors") +
guides(colour = "none") +
theme_oz() + theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5), panel.border = element_blank(), axis.ticks = element_line(colour = "black") ) +
# scale_y_continuous(expand = c(0,0), limits = c(0,ymax)) + scale_x_continuous(limits = c(-xmax,xmax))
geom_text_repel(fontface = "italic", data = filter(ipsc_mn_als_datasets.total_dorothea.contrast_acts, source %in% c(ipsc_mn_als_datasets.total_dorothea.contrast_acts.labels.up, ipsc_mn_als_datasets.total_dorothea.contrast_acts.labels.down, "TP53")), aes(label = source), max.overlaps = Inf, min.segment.length = unit(0, "lines"), size = 2.3) +
geom_vline(xintercept = 0, linetype = 'dotted') + ylim(c(0,2.8))
ipsc_mn_als_datasets.total_dorothea.contrast_acts.volcano
# # Scatter correlation
ipsc_mn_als_datasets.polyA_total.labels = select(ipsc_mn_als_datasets.total.res, gene_id, gene_name, polyA.stat = stat) %>% left_join(select(ipsc_mn_als_datasets.polyA.res, gene_id, gene_name, total.stat = stat)) %>% filter((polyA.stat > 2.5 & total.stat > 2.5) | (polyA.stat < -2.5 & total.stat < -2.5))
ipsc_mn_als_datasets.polyA_total.list = list("polyA: ALS vs CTRL" = ipsc_mn_als_datasets.polyA.res, "Total: ALS vs CTRL" = ipsc_mn_als_datasets.total.res)
ipsc_mn_als_datasets.polyA_total_stat.corr = plot_de_correlation(model_list1 = ipsc_mn_als_datasets.polyA_total.list, model_list2 = ipsc_mn_als_datasets.polyA_total.list, mutation1 = "polyA: ALS vs CTRL", mutation2 = "Total: ALS vs CTRL", gene_labels = pull(ipsc_mn_als_datasets.polyA_total.labels,gene_name), plot_corr_line = TRUE, plot_p_value = TRUE)
ipsc_mn_als_datasets.polyA_total_stat.corr
ipsc_mn_datasets_list.res = list("AnswerALS" = answerals$res, "neurolincs.diMN" = neurolincs.diMN$res, "neurolincs.iMN" = neurolincs.iMN$res, "catanese" = catanese$res, "dafinca C9orf72" = dafinca.c9orf72$res, "dafinca tardbp" = dafinca.tardbp$res, "luisier" = luisier$res, "wang" = wang$res, "kiskinis" = kiskinis$res, "sareen" = sareen$res, "sommer" = sommer$res, "sterneckert" = sterneckert$res, "kapeli" = kapeli$res, "desantis" = desantis$res, "smith" = smith$res, "bhinge" = bhinge$res, "hawkins" = hawkins$res)
ipsc_mn_datasets.res = bind_rows(ipsc_mn_datasets_list.res, .id = "dataset")
datasets_separated.volcano = volcano_plot(ipsc_mn_datasets.res, arrow_labels = FALSE, dpi = 300) + facet_wrap(~dataset, scales = "free", ncol = 6)
datasets_separated.volcano
# heatmap of correlation coefficients
ipsc_mn_datasets_stat = select(answerals$res, gene_id, answerals = stat) %>%
left_join(select(neurolincs.diMN$res, gene_id, neurolincs.diMN = stat)) %>%
left_join(select(neurolincs.iMN$res, gene_id, neurolincs.iMN = stat)) %>%
left_join(select(catanese$res, gene_id, catanese = stat)) %>%
left_join(select(dafinca.c9orf72$res, gene_id, dafinca.c9orf72 = stat)) %>%
left_join(select(dafinca.tardbp$res, gene_id, dafinca.tardbp = stat)) %>%
left_join(select(luisier$res, gene_id, luisier = stat)) %>%
left_join(select(wang$res, gene_id, wang = stat)) %>%
left_join(select(kiskinis$res, gene_id, kiskinis = stat)) %>%
left_join(select(sareen$res, gene_id, sareen = stat)) %>%
left_join(select(sommer$res, gene_id, sommer = stat)) %>%
left_join(select(sterneckert$res, gene_id, sterneckert = stat)) %>%
left_join(select(kapeli$res, gene_id, kapeli = stat)) %>%
left_join(select(desantis$res, gene_id, desantis = stat)) %>%
left_join(select(smith$res, gene_id, smith = stat)) %>%
left_join(select(bhinge$res, gene_id, bhinge = stat)) %>%
left_join(select(hawkins$res, gene_id, hawkins = stat)) %>%
drop_na() %>%
column_to_rownames("gene_id")
cormat <- round(cor(ipsc_mn_datasets_stat),2)
melted_cormat <- melt(cormat)
get_lower_tri<-function(cormat){ # Get lower triangle of the correlation matrix
cormat[upper.tri(cormat)] <- NA
return(cormat)
}
get_upper_tri <- function(cormat){ # Get upper triangle of the correlation matrix
cormat[lower.tri(cormat)]<- NA
return(cormat)
}
upper_tri <- get_upper_tri(cormat)
melted_cormat <- melt(upper_tri, na.rm = TRUE)
reorder_cormat <- function(cormat){ # Use correlation between variables as distance
dd <- as.dist((1-cormat)/2)
hc <- hclust(dd)
cormat <-cormat[hc$order, hc$order]
}
cormat <- reorder_cormat(cormat)
upper_tri <- get_upper_tri(cormat)
melted_cormat <- melt(upper_tri, na.rm = TRUE) # Melt the correlation matrix
dataset_dge_correlation_ggheatmap <- ggplot(melted_cormat, aes(Var2, Var1, fill = value))+
geom_tile(color = "white") + scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, limit = c(-1,1), space = "Lab", name="Pearson\nCorrelation") +
geom_text(aes(Var2, Var1, label = value), color = "black", size = 3) +
theme_minimal() + theme(axis.title.x = element_blank(), axis.title.y = element_blank(), panel.grid.major = element_blank(), panel.border = element_blank(), panel.background = element_blank(), axis.ticks = element_blank(),
legend.position = c(0.25,0.75), legend.direction = "horizontal", legend.title = element_text(size = 8), axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1), axis.text=element_text(size=7.5)) + # coord_fixed()+
guides(fill = guide_colorbar(barwidth = 7, barheight = 1, title.position = "top", title.hjust = 0.5))
dataset_dge_correlation_ggheatmap
ipsc_mn_datasets_list = list("AnswerALS" = select(answerals$res,gene_name,stat), "neurolincs.diMN" = select(neurolincs.diMN$res,gene_name,stat), "neurolincs.iMN" = select(neurolincs.iMN$res,gene_name,stat), "catanese" = select(catanese$res,gene_name,stat), "dafinca C9orf72" = select(dafinca.c9orf72$res,gene_name,stat), "dafinca tardbp" = select(dafinca.tardbp$res,gene_name,stat), "luisier" = select(luisier$res,gene_name,stat), "wang" = select(wang$res,gene_name,stat), "kiskinis" = select(kiskinis$res,gene_name,stat), "sareen" = select(sareen$res,gene_name,stat), "sommer" = select(sommer$res,gene_name,stat), "sterneckert" = select(sterneckert$res,gene_name,stat), "kapeli" = select(kapeli$res,gene_name,stat), "desantis" = select(desantis$res,gene_name,stat), "smith" = select(smith$res,gene_name,stat), "bhinge" = select(bhinge$res,gene_name,stat), "hawkins" = select(hawkins$res,gene_name,stat))
datasets <- names(ipsc_mn_datasets_list)
ipsc_mn_datasets_list = map(ipsc_mn_datasets_list, ~{drop_na(.x)})
ipsc_mn_datasets_list = map(ipsc_mn_datasets_list, ~{distinct(.x, gene_name, .keep_all = TRUE)})
ipsc_mn_datasets_list = map(ipsc_mn_datasets_list, ~{tibble_to_matrix(.x, stat, row_names = "gene_name")})
# PROGENy
# net_progeny <- get_progeny(organism = 'human', top = 100)
ipsc_mn_datasets_list.progeny_scores = map_df(ipsc_mn_datasets_list, ~{run_wmean(.x, net=net_progeny, .source='source', .target='target', .mor='weight', times = 1000, minsize = 5)}, .id = "dataset") %>% filter(statistic == 'norm_wmean')
ipsc_mn_datasets_list.p53.progeny_scores = ipsc_mn_datasets_list.progeny_scores %>% filter(source == "p53") %>% mutate(p.signif = case_when(p_value < 0.003 ~ "****", p_value < 0.009 ~ "***",p_value < 0.01 ~ "**",p_value < 0.05 ~ "*", TRUE ~ ""), group1 = dataset, group2 = dataset)
ipsc_mn_datasets_list.p53.progeny_scores.plot <- ggplot(ipsc_mn_datasets_list.p53.progeny_scores, aes(x=reorder(dataset, score), y = score)) +
geom_bar(aes(fill = score), stat = "identity") +
scale_fill_gradient2(low = "darkblue", high = "indianred", mid = "whitesmoke", midpoint = 0) + #scale_fill_npg() +
theme_oz() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), plot.title = element_text(hjust = 0.5), legend.position = "none") +
geom_hline(yintercept = 0, linetype = 3) +
labs(title = "p53 pathway activity", x = "", y = "Normalised enrichment") + #
stat_pvalue_manual(ipsc_mn_datasets_list.p53.progeny_scores, label = "p.signif", y.position = 11.5, xmin = "dataset", xmax = NULL, size = 4, hide.ns = TRUE) + ylim(c(-12,12))
ipsc_mn_datasets_list.p53.progeny_scores.plot
# Dorothea
# net_dorothea <- get_dorothea(organism='human', levels=c('A', 'B', 'C'))
ipsc_mn_datasets_list.dorothea_scores = map_df(ipsc_mn_datasets_list, ~{run_wmean(.x, net=net_dorothea, .source='source', .target='target', .mor='mor', times = 1000, minsize = 5)}, .id = "dataset") %>% filter(statistic == 'norm_wmean')
ipsc_mn_datasets_list.TP53.dorothea_scores = ipsc_mn_datasets_list.dorothea_scores %>% filter(source == "TP53") %>% mutate(p.signif = case_when(p_value < 0.003 ~ "****", p_value < 0.009 ~ "***",p_value < 0.01 ~ "**",p_value < 0.05 ~ "*", TRUE ~ ""), group1 = dataset, group2 = dataset)
ipsc_mn_datasets_list.TP53.dorothea_scores.plot <- ggplot(ipsc_mn_datasets_list.TP53.dorothea_scores, aes(x=reorder(dataset, score), y = score)) +
geom_bar(aes(fill = score), stat = "identity") +
scale_fill_gradient2(low = "darkblue", high = "indianred", mid = "whitesmoke", midpoint = 0) + #scale_fill_npg() +
theme_oz() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), plot.title = element_text(hjust = 0.5), legend.position = "none") +
geom_hline(yintercept = 0, linetype = 3) +
labs(title = "TP53 Transcription Factor activity", x = "", y = "Normalised enrichment") +
stat_pvalue_manual(ipsc_mn_datasets_list.TP53.dorothea_scores, label = "p.signif", y.position = 13, xmin = "dataset", xmax = NULL, size = 4, hide.ns = TRUE) + ylim(c(-13,13))
ipsc_mn_datasets_list.TP53.dorothea_scores.plot
answerals.proteomics.als_vs_ctrl.res = read_csv(here(shared_path,"/answerals/proteomics/answerals_proteomics_als_vs_ctrl.csv"))
answerals.proteomics.als_vs_ctrl.volcano = volcano_plot(answerals.proteomics.als_vs_ctrl.res, title = "Protein Expression", subtitle = "171 ALS vs 33 CTRL",
gene_labels = c(pull(top_n(answerals.proteomics.als_vs_ctrl.res,n=10,wt=stat),gene_name), pull(filter(answerals.proteomics.als_vs_ctrl.res, pvalue < 0.05, gene_name %in% c(ALS_genes,p53.signalling.pathway)),gene_name), "TARDBP","FUS","SFPQ","SOD1", "BRD4", "EEF2"),
ymax = 4,xmax = 2,numerator = "ALS",denomenator = "CTRL", significance_threshold = "pvalue", dpi = 300)
answerals.proteomics.als_vs_ctrl.volcano
# GO terms
answerals.proteomics.als_vs_ctrl.gost <- answerals.proteomics.als_vs_ctrl.res %>% filter(pvalue < 0.05) %>% group_split(log2FoldChange > 0) %>% map("gene_id") %>% gost()
answerals.proteomics.als_vs_ctrl.gprofiles <- answerals.proteomics.als_vs_ctrl.gost$result %>% filter(!str_detect(source, "TF|CORUM|HPA")) %>% arrange( -log10(p_value) ) %>% mutate(query = case_when(query == "query_1" ~ "ALS\nDown", query == "query_2" ~ "ALS\nUp"), term_name = as.factor(term_name))
answerals.proteomics.als_vs_ctrl.gprofiles_down <- answerals.proteomics.als_vs_ctrl.gprofiles %>% filter(query == "ALS\nDown")
answerals.proteomics.als_vs_ctrl.gprofiles_up <- answerals.proteomics.als_vs_ctrl.gprofiles %>% filter(query == "ALS\nUp")
down_list <- c(
"Schaffer collateral - CA1 synapse",
"Golgi transport complex")
answerals.proteomics.als_vs_ctrl.gprofiles_filt_down <- answerals.proteomics.als_vs_ctrl.gprofiles_down %>% filter(query == "ALS\nDown") %>% filter(term_name %in% down_list)
up_list <- c(
"proteasome complex",
"Axon guidance",
"nucleocytoplasmic transport",
"Cholesterol biosynthesis",
"Amyotrophic lateral sclerosis",
"protein modification process",
"Metabolism of proteins",
"Cellular responses to stress",
"protein localization",
"endoplasmic reticulum",
"protein binding",
"Metabolism of RNA",
"translation",
"protein metabolic process")
answerals.proteomics.als_vs_ctrl.gprofiles_filt_up <- answerals.proteomics.als_vs_ctrl.gprofiles_up %>% filter(query == "ALS\nUp") %>% filter(term_name %in% up_list)
answerals.proteomics.als_vs_ctrl.gprofiles_filt_combined <- bind_rows(answerals.proteomics.als_vs_ctrl.gprofiles_filt_down, answerals.proteomics.als_vs_ctrl.gprofiles_filt_up) %>% mutate(query = factor(query, levels = c("ALS\nUp", "ALS\nDown")))
answerals.proteomics.als_vs_ctrl.go_curated <- curated_profiles(gprofiles = answerals.proteomics.als_vs_ctrl.gprofiles_filt_combined,label_angle = 0) + theme(legend.position = "none", axis.text = element_text(size=10))
answerals.proteomics.als_vs_ctrl.go_curated
## GSEA: DNA damage response, p53 signalling pathway
answerals.proteomics.als_vs_ctrl.geneList <- answerals.proteomics.als_vs_ctrl.res %>% drop_na(stat) %>% pull(stat)
names(answerals.proteomics.als_vs_ctrl.geneList) <- answerals.proteomics.als_vs_ctrl.res %>% drop_na(stat) %>% pull(gene_name) %>% as.character()
answerals.proteomics.als_vs_ctrl.geneList = sort(answerals.proteomics.als_vs_ctrl.geneList, decreasing = TRUE)
# p53 signalling pathway
p53.signalling_genes.list <- list("p53.signalling" = answerals.proteomics.als_vs_ctrl.res$gene_name[answerals.proteomics.als_vs_ctrl.res$gene_name %in% p53.signalling.pathway])
answerals.proteomics.als_vs_ctrl.p53.signalling_fgseaRes <- fgsea(pathways = p53.signalling_genes.list, stats = answerals.proteomics.als_vs_ctrl.geneList)
answerals.proteomics.als_vs_ctrl.p53.signalling_fgseaRes
## pathway pval padj log2err ES NES size
## 1: p53.signalling 0.4594017 0.4594017 0.0525899 0.2633203 1.00739 103
## leadingEdge
## 1: RBBP7,CSNK2B,PRMT1,CNOT9,RPL23,RRM2,...
# pathway pval padj log2err ES NES size leadingEdge
# 1: p53.signalling 0.5122995 0.5122995 0.04763873 0.2633203 0.9831828 103 RBBP7,CSNK2B,PRMT1,CNOT9,RPL23,RRM2,...
answerals.proteomics.als_vs_ctrl.p53.signalling.gseaplot = plotEnrichment(p53.signalling_genes.list[["p53.signalling"]], answerals.proteomics.als_vs_ctrl.geneList) +
theme(axis.text=element_text(size=8), axis.title=element_text(size=10)) + ggtitle("p53 signalling") + xlab("") +
annotate("text", x = 3000, y = 0.2, label = paste0("NES = ", round(answerals.proteomics.als_vs_ctrl.p53.signalling_fgseaRes$NES, digits = 3),"\nP = ", round(answerals.proteomics.als_vs_ctrl.p53.signalling_fgseaRes$pval, digits = 6)), size = 2.8, hjust = 0) + theme_oz()
answerals.proteomics.als_vs_ctrl.p53.signalling.gseaplot
# Compare RNAseq to mass spec
ipsc_mn_rnaseq_mass_spec.join = select(answerals.proteomics.als_vs_ctrl.res, gene_name, gene_id, MS.lfc = log2FoldChange, MS.pvalue = pvalue, MS.padj = padj, MS.stat = stat) %>%
left_join(select(ipsc_mn_als_datasets.res, gene_name, gene_id, RNA.lfc = log2FoldChange, RNA.pvalue = pvalue, RNA.padj = padj, RNA.stat = stat))
# Correlate RNAseq & Mass Spec
ipsc_mn_rnaseq_mass_spec = list("RNA-seq: ALS vs CTRL" = ipsc_mn_als_datasets.res, "Mass Spec: ALS vs CTRL" = answerals.proteomics.als_vs_ctrl.res)
ipsc_mn_rnaseq_mass_spec.scatter = plot_de_correlation(model_list1 = ipsc_mn_rnaseq_mass_spec, model_list2 = ipsc_mn_rnaseq_mass_spec, mutation1 = "RNA-seq: ALS vs CTRL", mutation2 = "Mass Spec: ALS vs CTRL", col = "log2FoldChange", gene_labels = c("MACROD1","EEFSEC","EIF1AY","H3C4","CEMIP","PTGR1","HOXC8","KRT1","IGFBP5","TPX2", "TARDBP","FUS","SFPQ","SOD1"), plot_p_value = TRUE) & xlim(-0.9,0.6) & ylim(-1.8,1.9)
ipsc_mn_rnaseq_mass_spec.scatter