Fig 2 iPSN ALS meta-analysis

Volcano

ipsc_mn_als_datasets.als.labels <- ipsc_mn_als_datasets.res %>% filter(padj < 0.05, gene_name %in% c(ALS_genes, RNA_BINDING_PROTEINS, p53.signalling.pathway, gprofiler_database$`DNA Damage Response`)) %>% distinct(gene_name) %>% pull(gene_name)
ipsc_mn_als_datasets.gene.labels <- ipsc_mn_als_datasets.res %>% filter(!grepl("^AC|^AL", gene_name)) %>% top_n(20, wt = -pvalue) %>% pull(gene_name)
ipsc_mn_als_datasets.volcano <- volcano_plot(ipsc_mn_als_datasets.res, numerator = "ALS", denomenator = "CTRL",
               subtitle = "106 Controls vs 323 ALS", ymax = 8, xmax = 2, dpi = 72, gene_labels = c(ipsc_mn_als_datasets.als.labels, ipsc_mn_als_datasets.gene.labels))
ipsc_mn_als_datasets.volcano

Functional enrichment terms

ipsc_mn_als_datasets.gost <- ipsc_mn_als_datasets.res %>% filter(padj < 0.05) %>% group_split(log2FoldChange > 0) %>%  map("gene_id") %>% gost()
ipsc_mn_als_datasets.gprofiles <- ipsc_mn_als_datasets.gost$result %>% filter(!str_detect(source, "TF|CORUM")) %>% arrange( -log10(p_value) ) %>% mutate(query = case_when(query == "query_1" ~ " ALS Down ", query == "query_2" ~ " ALS Up "), term_name = as.factor(term_name))
ipsc_mn_als_datasets.gprofiles_down <- ipsc_mn_als_datasets.gprofiles %>% filter(query == " ALS Down ")
ipsc_mn_als_datasets.gprofiles_up <- ipsc_mn_als_datasets.gprofiles %>% filter(query == " ALS Up ")
# paste('"',unique(ipsc_mn_als_datasets.gprofiles_down$term_name),'",', sep = "", collapse = '\n') %>% cat()
down_list <- c(
"neuron fate commitment",
"cell differentiation in spinal cord",
"DNA-binding transcription factor activity, RNA polymerase II-specific",
"ventral spinal cord development",
"cis-regulatory region sequence-specific DNA binding"
)
ipsc_mn_als_datasets.gprofiles_filt_down <- ipsc_mn_als_datasets.gprofiles %>% filter(query == " ALS Down ")  %>% filter(term_name %in% down_list)
# paste('"',unique(ipsc_mn_als_datasets.gprofiles_up$term_name),'",', sep = "", collapse = '\n') %>% cat()
up_list <- c(
"p53 transcriptional gene network",
"Genotoxicity pathway",
"miRNA regulation of DNA damage response",
"DNA damage response",
"p53 signaling pathway")
ipsc_mn_als_datasets.gprofiles_filt_up <- ipsc_mn_als_datasets.gprofiles %>% filter(query == " ALS Up ")  %>% filter(term_name %in% up_list)
ipsc_mn_als_datasets.gprofiles_filt_combined <- bind_rows(ipsc_mn_als_datasets.gprofiles_filt_down, ipsc_mn_als_datasets.gprofiles_filt_up) %>% mutate(query = factor(query, levels = c(" ALS Up ", " ALS Down ")))
ipsc_mn_als_datasets.gprofiles_filt_combined = ipsc_mn_als_datasets.gprofiles_filt_combined %>% mutate(term_name = gsub("miRNA regulation of DNA damage response", "miRNA regulation of DNA damage", term_name), term_name = gsub("and","&",term_name), term_name = gsub("DNA-binding transcription factor activity, RNA polymerase II-specific","transcription factor activity",term_name), term_name = gsub("cis-regulatory region sequence-specific DNA binding","DNA binding",term_name), term_name = gsub("cell ","",term_name))
ipsc_mn_als_datasets.go_curated <- curated_profiles(gprofiles = ipsc_mn_als_datasets.gprofiles_filt_combined)
ipsc_mn_als_datasets.go_curated

GSEA of p53 signalling pathway

ipsc_mn_als_datasets.geneList <- ipsc_mn_als_datasets.res %>% drop_na(stat) %>% pull(stat)
names(ipsc_mn_als_datasets.geneList) <- ipsc_mn_als_datasets.res %>% drop_na(stat) %>% pull(gene_name) %>% as.character()
ipsc_mn_als_datasets.geneList = sort(ipsc_mn_als_datasets.geneList, decreasing = TRUE)

# p53 signalling pathway
p53.signalling_genes.list <- list("p53.signalling" = ipsc_mn_als_datasets.res$gene_name[ipsc_mn_als_datasets.res$gene_name %in% go.pathways.msigdb$GO_SIGNAL_TRANSDUCTION_BY_P53_CLASS_MEDIATOR])
ipsc_mn_als_datasets.p53.signalling_fgseaRes <- fgsea(pathways = p53.signalling_genes.list, stats = ipsc_mn_als_datasets.geneList)
ipsc_mn_als_datasets.p53.signalling.gseaplot = plotEnrichment(p53.signalling_genes.list[["p53.signalling"]], ipsc_mn_als_datasets.geneList) + 
  theme(axis.text=element_text(size=8), axis.title=element_text(size=10)) + ggtitle("p53 signalling") + xlab("") +
  annotate("text", x = 35000, y = 0.3, label = paste0("NES = ", round(ipsc_mn_als_datasets.p53.signalling_fgseaRes$NES, digits = 3),"\nP = ", round(ipsc_mn_als_datasets.p53.signalling_fgseaRes$pval, digits = 6)), size = 2.8, hjust = 0) + theme_oz()
ipsc_mn_als_datasets.p53.signalling.gseaplot

PROGENy pathways

https://www.bioconductor.org/packages/release/bioc/vignettes/decoupleR/inst/doc/pw_bk.html https://github.com/saezlab/transcriptutorial/blob/master/scripts/03_Pathway_activity_with_Progeny.md https://saezlab.github.io/progeny/

Pathway RespOnsive GENes (PROGENy) estimates signaling pathway activity

net_progeny <- get_progeny(organism = 'human', top = 100)
ipsc_mn_als_datasets.res.matrix <- ipsc_mn_als_datasets.res %>% select(gene_name, stat) %>%  drop_na(stat) %>% distinct(gene_name, .keep_all = TRUE) %>% tibble_to_matrix(stat, row_names = "gene_name")
ipsc_mn_als_progeny.contrast_acts <- run_wmean(mat=ipsc_mn_als_datasets.res.matrix, net=net_progeny, .source='source', .target='target', .mor='weight', times = 1000, minsize = 5) %>% filter(statistic == 'norm_wmean') %>% mutate(p.signif = case_when(p_value < 0.003 ~ "***", p_value < 0.009 ~ "***",p_value < 0.01 ~ "**",p_value < 0.05 ~ "*", TRUE ~ ""), group1 = source, group2 = source)
ipsc_mn_als_progeny.pathwayActivity <- ggplot(ipsc_mn_als_progeny.contrast_acts, aes(x=reorder(source, score), y = score)) +
    geom_bar(aes(fill = score), stat = "identity") +
    scale_fill_gradient2(low = "darkblue", high = "indianred", mid = "whitesmoke", midpoint = 0) + 
    theme_oz() + theme(axis.text.x = element_text(angle = 90), legend.position = "none", plot.title = element_text(hjust = 0.5)) +
    labs(title = "Signaling Pathways", x = "", y = "ALS vs CTRL Normalised enrichment") +
    stat_pvalue_manual(ipsc_mn_als_progeny.contrast_acts, label = "p.signif", y.position = 14, xmin = "source", xmax = NULL, size = 3, hide.ns = TRUE) 
ipsc_mn_als_progeny.pathwayActivity

p53 weights

ipsc_mn_als_progeny.p53weights = net_progeny %>% filter(source == "p53", target %in% ipsc_mn_als_datasets.res$gene_name) %>% left_join(select(ipsc_mn_als_datasets.res, target=gene_name,stat)) %>% 
  mutate(color = case_when(weight > 0 & stat > 0 ~ '1', weight > 0 & stat < 0 ~ '2', weight < 0 & stat > 0 ~ '2', weight < 0 & stat < 0 ~ '1', TRUE ~ "3"))
ipsc_mn_als_progeny.p53weights.plot = ggplot(ipsc_mn_als_progeny.p53weights, aes(x = weight, y = stat, color = color)) + geom_point(size = 0.5) +
  scale_colour_manual(values = c("red","royalblue3","grey")) +
  geom_text_repel(fontface = "italic", data = filter(ipsc_mn_als_progeny.p53weights, weight > 5, stat > 3), aes(label = target), max.overlaps = Inf, min.segment.length = unit(0, "lines"), size = 2.1) +
  theme_oz() + theme(legend.position = "none", plot.title = element_text(hjust = 0.5)) +
  labs(title = "p53 pathway weights", x = "p53 pathway gene weight", y = "ALS vs CTRL test statistic") +
  geom_vline(xintercept = 0, linetype = 'dotted') +  geom_hline(yintercept = 0, linetype = 'dotted') +
  coord_cartesian(xlim = c(-8,18), ylim = c(-2.5,5)) 
ipsc_mn_als_progeny.p53weights.plot

DoRoTHEA transcription factors

https://www.bioconductor.org/packages/release/bioc/vignettes/decoupleR/inst/doc/tf_bk.html https://github.com/saezlab/transcriptutorial/blob/master/scripts/04_TranscriptionFactor_activity_with_Dorothea.md

net_dorothea <- get_dorothea(organism='human', levels=c('A', 'B', 'C'))
ipsc_mn_als_dorothea.contrast_acts <- run_wmean(mat=ipsc_mn_als_datasets.res.matrix, net=net_dorothea, .source='source', .target='target', .mor='mor', times = 1000, minsize = 5) %>% filter(statistic == 'norm_wmean') %>% 
  mutate(p.signif = case_when(p_value < 0.05 ~ "*", TRUE ~ ""), sig = case_when(p_value < 0.135 & abs(score) < 4 ~ "sig", p_value < 0.135 & abs(score) >= 4 ~ "sig_strong", p_value >= 0.135 ~ "non_sig"), direction = ifelse(score > 0, "up", "down"), class = paste(sig, direction))

ipsc_mn_als_dorothea.contrast_acts.labels.up = ipsc_mn_als_dorothea.contrast_acts %>% top_n(score, n = 3) %>% pull(source)
ipsc_mn_als_dorothea.contrast_acts.labels.down = ipsc_mn_als_dorothea.contrast_acts %>% top_n(-score, n = 3) %>% pull(source)

# Volcano of TFs score vs p_value
ipsc_mn_als_dorothea.contrast_acts.volcano = ggplot(ipsc_mn_als_dorothea.contrast_acts, aes(x = score, y = -log10(p_value))) + geom_point(aes(colour = class)) +
    scale_colour_manual(values = c("non_sig up" = "gray", "non_sig down" = "gray",  "sig up" = "#F36F6F", "sig_strong up" = "#EB4445", "sig down" = "#A6CEE3", "sig_strong down" = "#79B1D3")) +
  labs(y = expression(-log[10]~P~value), x = "Normalised enrichment ALS vs CTRL", title = "Transcription Factors") +
    guides(colour = "none") +
    theme_oz() +  theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5), panel.border = element_blank(), axis.ticks = element_line(colour = "black") ) +
  geom_text_repel(fontface = "italic", data = filter(ipsc_mn_als_dorothea.contrast_acts, source %in% c(ipsc_mn_als_dorothea.contrast_acts.labels.up, ipsc_mn_als_dorothea.contrast_acts.labels.down)), aes(label = source), max.overlaps = Inf, min.segment.length = unit(0, "lines"), size = 2.5) +  
  geom_vline(xintercept = 0, linetype = 'dotted') + ylim(c(0,2.8))
ipsc_mn_als_dorothea.contrast_acts.volcano

TP53 target genes

ipsc_mn_als_dorothea.tp53 <- net_dorothea %>%  filter(source == "TP53") %>% mutate(gene_name = target) %>% left_join(select(ipsc_mn_als_datasets.res, gene_name,stat,log2FoldChange,pvalue)) %>%
  mutate(color = case_when(mor > 0 & stat > 0 ~ '1', mor > 0 & stat < 0~'2',mor < 0 & stat > 0~'2',mor < 0 & stat < 0~'1',TRUE ~ "3"))

ipsc_mn_als_dorothea.tp53.volcano = ggplot(ipsc_mn_als_dorothea.tp53, aes(x = log2FoldChange, y = -log10(pvalue), color = color, size=abs(mor))) + geom_point() + scale_size(range = c(0, 3)) +
  scale_colour_manual(values = c("red","royalblue3","grey")) +
  geom_text_repel(data = filter(ipsc_mn_als_dorothea.tp53, abs(stat) > 3), aes(label = gene_name, size=1), fontface = "italic", max.overlaps = Inf, size = 2.3) +
  theme_oz() + theme(legend.position = "none", plot.title = element_text(hjust = 0.5)) +  geom_vline(xintercept = 0, linetype = 'dotted') + geom_hline(yintercept = 0, linetype = 'dotted') +
    labs(title = "TP53 regulon", x = "log2 fold change (ALS vs CTRL)", y = expression(-log[10]~P~value)) + ylim(c(0,2)) +   
  geom_text_repel(fontface = "italic", data = filter(ipsc_mn_als_dorothea.tp53, log2FoldChange > 0.25, -log10(pvalue) > 1.0), aes(label = target), max.overlaps = Inf, min.segment.length = unit(0, "lines"), size = 2.5)
ipsc_mn_als_dorothea.tp53.volcano

Table S3 DGE panALS

ipsc_mn_als_datasets.res.sig = ipsc_mn_als_datasets.res %>% filter(padj < 0.05) %>% select(gene_name, everything())
kbl(ipsc_mn_als_datasets.res.sig) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), fixed_thead = T) %>% scroll_box(width = "100%", height = "500px")
gene_name gene_id baseMean log2FoldChange lfcSE stat pvalue padj
MTCO1P12 ENSG00000237973 572.701897 -1.6010349 0.1962405 -8.158533 0.00e+00 0.0000000
ADGRG7 ENSG00000144820 8.018303 3.3092857 0.5420888 6.104693 0.00e+00 0.0000141
RNASEL ENSG00000135828 149.633190 0.5962462 0.1171485 5.089660 4.00e-07 0.0032664
CHRND ENSG00000135902 6.225069 -0.6088888 0.1269986 -4.794454 1.60e-06 0.0077084
TBX5 ENSG00000089225 3.673370 -3.1032906 0.6499186 -4.774891 1.80e-06 0.0077084
ZNF676 ENSG00000196109 3.733748 3.3438001 0.7011752 4.768851 1.90e-06 0.0077084
AC025171.1 ENSG00000177738 46.076146 -0.3937060 0.0827819 -4.755944 2.00e-06 0.0077084
TMSB15B ENSG00000158427 653.227425 -0.2384203 0.0510804 -4.667548 3.00e-06 0.0086407
PTCHD4 ENSG00000244694 834.105420 0.3292627 0.0706018 4.663660 3.10e-06 0.0086407
NDRG2 ENSG00000165795 581.833220 -0.1970758 0.0422915 -4.659941 3.20e-06 0.0086407
MTCO2P12 ENSG00000229344 7.853073 -1.4451766 0.3127088 -4.621477 3.80e-06 0.0092457
MYOG ENSG00000122180 4.184301 -0.9060086 0.1967146 -4.605702 4.10e-06 0.0092457
TCEAL6 ENSG00000204071 57.699706 0.6145108 0.1338354 4.591542 4.40e-06 0.0092457
TNFRSF10B ENSG00000120889 5046.827884 0.2082392 0.0461122 4.515926 6.30e-06 0.0116535
THSD1 ENSG00000136114 99.370506 0.2534239 0.0561581 4.512689 6.40e-06 0.0116535
AC120036.4 ENSG00000255366 162.220351 0.4677634 0.1040612 4.495078 7.00e-06 0.0116535
POU5F1 ENSG00000204531 62.072375 -1.8657621 0.4158933 -4.486155 7.30e-06 0.0116535
ISCU ENSG00000136003 1487.773856 0.1030001 0.0230432 4.469863 7.80e-06 0.0118787
TP53TG3E ENSG00000275034 2.542112 7.1535266 1.6299732 4.388739 1.14e-05 0.0163788
SESN1 ENSG00000080546 1249.025842 0.1711968 0.0391061 4.377748 1.20e-05 0.0163788
LMO4 ENSG00000143013 1893.406368 -0.1959024 0.0450082 -4.352595 1.35e-05 0.0167379
RRM2B ENSG00000048392 1858.481757 0.2020891 0.0464341 4.352172 1.35e-05 0.0167379
SOD2-OT1 ENSG00000285427 40.135367 -0.2587170 0.0596421 -4.337828 1.44e-05 0.0170913
DACH2 ENSG00000126733 214.728551 -0.4718518 0.1098737 -4.294493 1.75e-05 0.0198326
CDKN1A ENSG00000124762 2557.463659 0.3542375 0.0827034 4.283226 1.84e-05 0.0198326
PCDH12 ENSG00000113555 14.374543 0.7988895 0.1867528 4.277790 1.89e-05 0.0198326
FOXN4 ENSG00000139445 41.735793 -0.8249476 0.1938962 -4.254583 2.09e-05 0.0211906
CROCC2 ENSG00000226321 8.794508 -1.5582071 0.3704684 -4.206046 2.60e-05 0.0253547
PDE8B ENSG00000113231 185.056712 -0.4275982 0.1025237 -4.170726 3.04e-05 0.0286020
OLIG2 ENSG00000205927 193.846024 -0.7014318 0.1688481 -4.154218 3.26e-05 0.0297222
AC026367.3 ENSG00000275759 9.932144 0.4831218 0.1172603 4.120079 3.79e-05 0.0328980
MIR34AHG ENSG00000228526 339.051491 0.3735483 0.0908080 4.113608 3.90e-05 0.0328980
PLIN5 ENSG00000214456 30.874450 -0.4650748 0.1132377 -4.107068 4.01e-05 0.0328980
OLIG1 ENSG00000184221 98.491406 -0.7260811 0.1770438 -4.101139 4.11e-05 0.0328980
RPL23AP2 ENSG00000225067 4.347676 1.4318105 0.3496167 4.095372 4.21e-05 0.0328980
AACSP1 ENSG00000250420 62.083860 -1.0099581 0.2490903 -4.054587 5.02e-05 0.0371376
ABL2 ENSG00000143322 2631.767326 0.1245984 0.0307329 4.054229 5.03e-05 0.0371376
AL450405.1 ENSG00000230202 58.424878 0.8107089 0.2005597 4.042233 5.29e-05 0.0380616
PAQR4 ENSG00000162073 878.096823 -0.1423603 0.0353904 -4.022565 5.76e-05 0.0397368
RPL29P11 ENSG00000224858 9.135389 -0.9411368 0.2341103 -4.020057 5.82e-05 0.0397368
TNNC2 ENSG00000101470 6.241813 -0.5157090 0.1286956 -4.007200 6.14e-05 0.0402053
OPCML-IT1 ENSG00000254896 21.400539 -0.4982370 0.1243796 -4.005778 6.18e-05 0.0402053
FBXO22 ENSG00000167196 2179.343449 0.1144354 0.0288417 3.967703 7.26e-05 0.0461030

Table S4 Dorothea panALS

ipsc_mn_als_dorothea.contrast_acts <- run_wmean(mat=ipsc_mn_als_datasets.res.matrix, net=net_dorothea, .source='source', .target='target', .mor='mor', times = 100, minsize = 5) %>% filter(statistic == 'norm_wmean') %>% select(-statistic, -condition, transcription_factor = source, normalised_enrichment_score = score) %>% arrange(p_value, -abs(normalised_enrichment_score)) %>% top_n(n = 20, wt = abs(normalised_enrichment_score))
kbl(ipsc_mn_als_dorothea.contrast_acts) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), fixed_thead = T) %>% scroll_box(width = "100%", height = "500px")
transcription_factor normalised_enrichment_score p_value
TP53 7.888387 0.02
PRDM14 -7.249614 0.02
ZNF274 6.598009 0.02
ZBTB33 6.299956 0.02
ATF4 6.233782 0.02
SOX2 -5.917799 0.02
TFAP2C -5.878825 0.02
ZNF263 -5.696282 0.02
ONECUT1 -5.518476 0.02
SIX5 -5.254948 0.02
ZBTB11 5.042477 0.02
CTCF -4.874786 0.02
ZNF639 4.751471 0.02
SNAPC4 -4.650668 0.02
GLI2 -4.376095 0.02
SIX2 -4.344603 0.02
HMBOX1 4.132908 0.02
ESR1 -4.132246 0.02
IRF3 -3.943642 0.02
STAT1 3.941067 0.10

Fig S8 GSEA DNA damage

ipsc_mn_als_datasets.geneList <- ipsc_mn_als_datasets.res %>% drop_na(stat) %>% pull(stat)
names(ipsc_mn_als_datasets.geneList) <- ipsc_mn_als_datasets.res %>% drop_na(stat) %>% pull(gene_name) %>% as.character()
ipsc_mn_als_datasets.geneList = sort(ipsc_mn_als_datasets.geneList, decreasing = TRUE)

gprofiler_database.DNA_damage_pathways = gprofiler_database[grepl('DNA damage|p53', names(gprofiler_database))]
gprofiler_database.DNA_damage_pathways_fgseaRes <- fgsea(pathways = gprofiler_database.DNA_damage_pathways, stats = ipsc_mn_als_datasets.geneList)
gprofiler_database.DNA_damage.filt = filter(gprofiler_database.DNA_damage_pathways_fgseaRes, size >7, !grepl("uncertain|negative|prostate",pathway)) %>% mutate(p.signif = case_when(pval < 0.003 ~ "***", pval < 0.009 ~ "***",pval < 0.01 ~ "**",pval < 0.05 ~ "*", TRUE ~ ""), group1 = "pathway", group2 = "pathway")
gprofiler_database.DNA_damage_pathways_fgseaRes.barplot <- ggplot(gprofiler_database.DNA_damage.filt, aes(x=reorder(pathway, NES), y = NES)) +
    geom_bar(aes(fill = NES), stat = "identity") +
    scale_fill_gradient2(low = "darkblue", high = "indianred", mid = "whitesmoke", midpoint = 0) + 
    theme_oz() + theme(legend.position = "none", plot.title = element_text(hjust = 0.5)) +
    labs(title = "DNA damage pathways", y = "ALS vs CTRL iPSMNs\nNormalised Enrichment Score", x = "") + geom_hline(yintercept = 0, linetype = 3) + 
    stat_pvalue_manual(gprofiler_database.DNA_damage.filt, label = "p.signif", y.position = 2, size = 4, xmin = "pathway", xmax = NULL, hide.ns = TRUE) + coord_flip()
gprofiler_database.DNA_damage_pathways_fgseaRes.barplot

Fig S9 polyA & total subgroup analyses

polyA

PCA

ipsc_mn_als_datasets.polyA.pcaData <- plotPCA(ipsc_mn_als_datasets.polyA.vsd, intgroup=c("sample", "dataset", "database_dir", "condition", "mutation", "dataset_sample", "strandedness", "onset_site", "study_accession", "DIV", "instrument","library_type"), returnData=TRUE) %>% as_tibble() %>%
  mutate(dataset = as.character(dataset), dataset = gsub("dafianca","dafinca",dataset), dataset_sample = gsub("dafianca","dafinca",dataset_sample), dataset = case_when(dataset == "neurolincs0" & grepl("^0007",sample) ~ "neurolincs_diMN", dataset == "neurolincsA" & grepl("^A-042",sample) ~ "neurolincs_iMN", dataset == "answerals" ~ "AnswerALS",  dataset %in% c("dafinca.c9orf72","dafinca.tardbp") ~ "dafinca", TRUE ~  dataset), 
         DIV = as.character(DIV), DIV = case_when(is.na(DIV) ~ "30", TRUE ~  DIV), database_dir = gsub("/camp/lab/patanir/home/shared/projects/patani-collab/public-data","/camp/project/proj-luscombn-patani/working/public-data",database_dir),
         library_type = case_when(dataset %in% c("catanese","dafinca","kapeli","kiskinis","luisier","mitchell","sareen","sommer","smith","sterneckert","wang") ~ "poly(A)", TRUE ~ "Total")) %>%
  left_join(select(filter(ipsc_mn_als_datasets.metadata, library_type == "poly(A)"), dataset_sample, gender)) 
  
ipsc_mn_als_datasets.polyA.pca_dataset <- ggplot(ipsc_mn_als_datasets.polyA.pcaData, aes(PC1, PC2, color=dataset)) + geom_point(size=1) + 
  theme_oz() + theme(legend.text=element_text(size=8), legend.title=element_text(size=7), legend.position = "top", legend.spacing = unit(0.01, 'cm')) + 
  guides(color=guide_legend(keywidth=0.1, keyheight=0.1, default.unit="inch")) +
  xlab(paste0("PC1: ",round(100 * attr(ipsc_mn_als_datasets.polyA.pcaData, "percentVar"))[1],"% variance")) + ylab(paste0("PC2: ",round(100 * attr(ipsc_mn_als_datasets.polyA.pcaData, "percentVar"))[2],"% variance")) + labs(color="polyA\ndatasets") + 
  ggforce::geom_mark_ellipse(aes(label = dataset), label.fontsize = 8, label.fontface = "plain", con.type = "straight", con.cap = 0.5, show.legend = FALSE)
ipsc_mn_als_datasets.polyA.pca_dataset

Volcano

ipsc_mn_als_datasets.polyA.als.labels <- ipsc_mn_als_datasets.polyA.res %>% filter(padj < 0.05, gene_name %in% c(ALS_genes, RNA_BINDING_PROTEINS, p53.signalling.pathway, gprofiler_database$`DNA Damage Response`)) %>% distinct(gene_name) %>% pull(gene_name)
ipsc_mn_als_datasets.polyA.gene.labels <- ipsc_mn_als_datasets.polyA.res %>% filter(!grepl("^AC|^AL", gene_name)) %>% top_n(20, wt = -pvalue) %>% pull(gene_name)
ipsc_mn_als_datasets.polyA.volcano <- volcano_plot(ipsc_mn_als_datasets.polyA.res, numerator = "ALS", denomenator = "CTRL",
               title = "polyA", subtitle = "43 Controls vs 48 ALS", ymax = 10, xmax = 3, dpi = 72, gene_labels = c(ipsc_mn_als_datasets.polyA.als.labels, ipsc_mn_als_datasets.polyA.gene.labels,"SESN1","TCEAL6"), distinct_labels = TRUE)
ipsc_mn_als_datasets.polyA.volcano

DoRoTHEA & PROGENy

net_progeny <- get_progeny(organism = 'human', top = 100)
ipsc_mn_als_datasets.polyA.res.matrix <- ipsc_mn_als_datasets.polyA.res %>% select(gene_name, stat) %>%  drop_na(stat) %>% distinct(gene_name, .keep_all = TRUE) %>% tibble_to_matrix(stat, row_names = "gene_name")
ipsc_mn_als_datasets.polyA_progeny.contrast_acts <- run_wmean(mat=ipsc_mn_als_datasets.polyA.res.matrix, net=net_progeny, .source='source', .target='target', .mor='weight', times = 1000, minsize = 5) %>% filter(statistic == 'norm_wmean') %>% mutate(p.signif = case_when(p_value < 0.003 ~ "****", p_value < 0.009 ~ "***",p_value < 0.01 ~ "**",p_value < 0.05 ~ "*", TRUE ~ ""), group1 = source, group2 = source)
ipsc_mn_als_datasets.polyA_progeny.pathwayActivity <- ggplot(ipsc_mn_als_datasets.polyA_progeny.contrast_acts, aes(x=reorder(source, score), y = score)) +
    geom_bar(aes(fill = score), stat = "identity") +
    scale_fill_gradient2(low = "darkblue", high = "indianred", mid = "whitesmoke", midpoint = 0) + 
    theme_oz() + theme(axis.text.x = element_text(angle = 90), legend.position = "none", plot.title = element_text(hjust = 0.5)) +
    labs(title = "polyA: Signaling Pathways", x = "", y = "ALS vs CTRL enrichment") +
    stat_pvalue_manual(ipsc_mn_als_datasets.polyA_progeny.contrast_acts, label = "p.signif", y.position = 12, xmin = "source", xmax = NULL, size = 6, hide.ns = TRUE) 
ipsc_mn_als_datasets.polyA_progeny.pathwayActivity

net_dorothea <- get_dorothea(organism='human', levels=c('A', 'B', 'C'))
ipsc_mn_als_datasets.polyA_dorothea.contrast_acts <- run_wmean(mat=ipsc_mn_als_datasets.polyA.res.matrix, net=net_dorothea, .source='source', .target='target', .mor='mor', times = 1000, minsize = 5) %>% filter(statistic == 'norm_wmean') %>% 
  mutate(p.signif = case_when(p_value < 0.05 ~ "*", TRUE ~ ""), sig = case_when(p_value < 0.135 & abs(score) < 4 ~ "sig", p_value < 0.135 & abs(score) >= 4 ~ "sig_strong", p_value >= 0.135 ~ "non_sig"), direction = ifelse(score > 0, "up", "down"), class = paste(sig, direction))
ipsc_mn_als_datasets.polyA_dorothea.contrast_acts.labels.up = ipsc_mn_als_datasets.polyA_dorothea.contrast_acts %>% filter(p_value < 0.15) %>% top_n(score, n = 4) %>% pull(source)
ipsc_mn_als_datasets.polyA_dorothea.contrast_acts.labels.down = ipsc_mn_als_datasets.polyA_dorothea.contrast_acts %>% top_n(-score, n = 4) %>% pull(source)

# Volcano of TFs score vs p_value
ipsc_mn_als_datasets.polyA_dorothea.contrast_acts.volcano = ggplot(ipsc_mn_als_datasets.polyA_dorothea.contrast_acts, aes(x = score, y = -log10(p_value))) + geom_point(aes(colour = class)) +
    scale_colour_manual(values = c("non_sig up" = "gray", "non_sig down" = "gray",  "sig up" = "#F36F6F", "sig_strong up" = "#EB4445", "sig down" = "#A6CEE3", "sig_strong down" = "#79B1D3")) +
  labs(y = expression(-log[10]~P~value), x = "Normalised enrichment ALS vs CTRL", title = "polyA: Transcription Factors") +
    guides(colour = "none") +
    theme_oz() +  theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5), panel.border = element_blank(), axis.ticks = element_line(colour = "black") ) +
  geom_text_repel(fontface = "italic", data = filter(ipsc_mn_als_datasets.polyA_dorothea.contrast_acts, source %in% c(ipsc_mn_als_datasets.polyA_dorothea.contrast_acts.labels.up, ipsc_mn_als_datasets.polyA_dorothea.contrast_acts.labels.down)), aes(label = source), max.overlaps = Inf, min.segment.length = unit(0, "lines"), size = 2.3) +  
  geom_vline(xintercept = 0, linetype = 'dotted') + ylim(c(0,2.8))
ipsc_mn_als_datasets.polyA_dorothea.contrast_acts.volcano

Total

PCA

ipsc_mn_als_datasets.total.pcaData <- plotPCA(ipsc_mn_als_datasets.total.vsd, intgroup=c("sample", "dataset", "database_dir", "condition", "mutation", "dataset_sample", "strandedness", "onset_site", "study_accession", "DIV", "instrument","library_type"), returnData=TRUE) %>% as_tibble() %>%
  mutate(dataset = as.character(dataset), dataset = gsub("dafianca","dafinca",dataset), dataset_sample = gsub("dafianca","dafinca",dataset_sample), dataset = case_when(dataset == "neurolincs0" & grepl("^0007",sample) ~ "neurolincs_diMN", dataset == "neurolincsA" & grepl("^A-042",sample) ~ "neurolincs_iMN", dataset == "answerals" ~ "AnswerALS",  dataset %in% c("dafinca.c9orf72","dafinca.tardbp") ~ "dafinca", TRUE ~  dataset), 
         DIV = as.character(DIV), DIV = case_when(is.na(DIV) ~ "30", TRUE ~  DIV), database_dir = gsub("/camp/lab/patanir/home/shared/projects/patani-collab/public-data","/camp/project/proj-luscombn-patani/working/public-data",database_dir),
         library_type = case_when(dataset %in% c("catanese","dafinca","kapeli","kiskinis","luisier","mitchell","sareen","sommer","smith","sterneckert","wang") ~ "poly(A)", TRUE ~ "Total")) %>%
  left_join(select(filter(ipsc_mn_als_datasets.metadata, library_type == "Total"), dataset_sample, gender)) 

ipsc_mn_als_datasets.total.pca_dataset <- ggplot(ipsc_mn_als_datasets.total.pcaData, aes(PC1, PC2, color=dataset)) + geom_point(size=1) + 
  theme_oz() + theme(legend.text=element_text(size=8), legend.title=element_text(size=7), legend.position = "top", legend.spacing = unit(0.01, 'cm')) + guides(color=guide_legend(nrow=2, keywidth=0.1, keyheight=0.1, default.unit="inch")) +
  xlab(paste0("PC1: ",round(100 * attr(ipsc_mn_als_datasets.total.pcaData, "percentVar"))[1],"% variance")) +  ylab(paste0("PC2: ",round(100 * attr(ipsc_mn_als_datasets.total.pcaData, "percentVar"))[2],"% variance"))  + labs(color="Total\ndatasets") +
    ggforce::geom_mark_ellipse(aes(label = dataset), label.fontsize = 8, label.fontface = "plain", con.type = "straight", con.cap = 0.5, show.legend = FALSE)
ipsc_mn_als_datasets.total.pca_dataset

Volcano

ipsc_mn_als_datasets.total.als.labels <- ipsc_mn_als_datasets.total.res %>% filter(padj < 0.05, gene_name %in% c(ALS_genes, RNA_BINDING_PROTEINS, p53.signalling.pathway, gprofiler_database$`DNA Damage Response`)) %>% distinct(gene_name) %>% pull(gene_name)
ipsc_mn_als_datasets.total.gene.labels <- ipsc_mn_als_datasets.total.res %>% filter(!grepl("^AC|^AL", gene_name)) %>% top_n(20, wt = -pvalue) %>% pull(gene_name)
ipsc_mn_als_datasets.total.volcano <- volcano_plot(ipsc_mn_als_datasets.total.res, numerator = "ALS", denomenator = "CTRL",
               title = "Total", subtitle = "63 Controls vs 275 ALS", ymax = 8, xmax = 3, dpi = 72, gene_labels = c(ipsc_mn_als_datasets.total.als.labels, ipsc_mn_als_datasets.total.gene.labels,"RNASEL","SESN1", "TCEAL6","MTCO1P12","MTCO2P12","LMO4"))
ipsc_mn_als_datasets.total.volcano

DoRoTHEA & PROGENy

ipsc_mn_als_datasets.total.res.matrix <- ipsc_mn_als_datasets.total.res %>% select(gene_name, stat) %>%  drop_na(stat) %>% distinct(gene_name, .keep_all = TRUE) %>% tibble_to_matrix(stat, row_names = "gene_name")
ipsc_mn_als_datasets.total_progeny.contrast_acts <- run_wmean(mat=ipsc_mn_als_datasets.total.res.matrix, net=net_progeny, .source='source', .target='target', .mor='weight', times = 1000, minsize = 5) %>% filter(statistic == 'norm_wmean') %>% mutate(p.signif = case_when(p_value < 0.003 ~ "****", p_value < 0.009 ~ "***",p_value < 0.01 ~ "**",p_value < 0.05 ~ "*", TRUE ~ ""), group1 = source, group2 = source)
ipsc_mn_als_datasets.total_progeny.pathwayActivity <- ggplot(ipsc_mn_als_datasets.total_progeny.contrast_acts, aes(x=reorder(source, score), y = score)) +
    geom_bar(aes(fill = score), stat = "identity") +
    scale_fill_gradient2(low = "darkblue", high = "indianred", mid = "whitesmoke", midpoint = 0) + 
    theme_oz() + theme(axis.text.x = element_text(angle = 90), legend.position = "none", plot.title = element_text(hjust = 0.5)) +
    labs(title = "Total: Signaling Pathways", x = "", y = "ALS vs CTRL enrichment") +
    stat_pvalue_manual(ipsc_mn_als_datasets.total_progeny.contrast_acts, label = "p.signif", y.position = 8, xmin = "source", xmax = NULL, size = 6, hide.ns = TRUE) 
ipsc_mn_als_datasets.total_progeny.pathwayActivity

ipsc_mn_als_datasets.total_dorothea.contrast_acts <- run_wmean(mat=ipsc_mn_als_datasets.total.res.matrix, net=net_dorothea, .source='source', .target='target', .mor='mor', times = 1000, minsize = 5) %>% filter(statistic == 'norm_wmean') %>% 
  mutate(p.signif = case_when(p_value < 0.05 ~ "*", TRUE ~ ""), sig = case_when(p_value < 0.135 & abs(score) < 4 ~ "sig", p_value < 0.135 & abs(score) >= 4 ~ "sig_strong", p_value >= 0.135 ~ "non_sig"), direction = ifelse(score > 0, "up", "down"), class = paste(sig, direction))
ipsc_mn_als_datasets.total_dorothea.contrast_acts.labels.up = ipsc_mn_als_datasets.total_dorothea.contrast_acts %>% top_n(score, n = 5) %>% pull(source)
ipsc_mn_als_datasets.total_dorothea.contrast_acts.labels.down = ipsc_mn_als_datasets.total_dorothea.contrast_acts %>% top_n(-score, n = 5) %>% pull(source)
# Volcano of TFs score vs p_value
ipsc_mn_als_datasets.total_dorothea.contrast_acts.volcano = ggplot(ipsc_mn_als_datasets.total_dorothea.contrast_acts, aes(x = score, y = -log10(p_value))) + geom_point(aes(colour = class)) +
    scale_colour_manual(values = c("non_sig up" = "gray", "non_sig down" = "gray",  "sig up" = "#F36F6F", "sig_strong up" = "#EB4445", "sig down" = "#A6CEE3", "sig_strong down" = "#79B1D3")) +
  labs(y = expression(-log[10]~P~value), x = "Normalised enrichment ALS vs CTRL", title = "Total: Transcription Factors") +
    guides(colour = "none") +
    theme_oz() +  theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(hjust = 0.5), panel.border = element_blank(), axis.ticks = element_line(colour = "black") ) +
  # scale_y_continuous(expand = c(0,0), limits = c(0,ymax)) + scale_x_continuous(limits = c(-xmax,xmax))
  geom_text_repel(fontface = "italic", data = filter(ipsc_mn_als_datasets.total_dorothea.contrast_acts, source %in% c(ipsc_mn_als_datasets.total_dorothea.contrast_acts.labels.up, ipsc_mn_als_datasets.total_dorothea.contrast_acts.labels.down, "TP53")), aes(label = source), max.overlaps = Inf, min.segment.length = unit(0, "lines"), size = 2.3) +  
  geom_vline(xintercept = 0, linetype = 'dotted') + ylim(c(0,2.8))
ipsc_mn_als_datasets.total_dorothea.contrast_acts.volcano

Compare polyA & total

# # Scatter correlation
ipsc_mn_als_datasets.polyA_total.labels = select(ipsc_mn_als_datasets.total.res, gene_id, gene_name, polyA.stat = stat) %>% left_join(select(ipsc_mn_als_datasets.polyA.res, gene_id, gene_name, total.stat = stat)) %>% filter((polyA.stat > 2.5 & total.stat > 2.5) | (polyA.stat < -2.5 & total.stat < -2.5))
ipsc_mn_als_datasets.polyA_total.list = list("polyA: ALS vs CTRL" = ipsc_mn_als_datasets.polyA.res, "Total: ALS vs CTRL" = ipsc_mn_als_datasets.total.res)
ipsc_mn_als_datasets.polyA_total_stat.corr = plot_de_correlation(model_list1 = ipsc_mn_als_datasets.polyA_total.list, model_list2 = ipsc_mn_als_datasets.polyA_total.list, mutation1 = "polyA: ALS vs CTRL", mutation2 = "Total: ALS vs CTRL", gene_labels = pull(ipsc_mn_als_datasets.polyA_total.labels,gene_name), plot_corr_line = TRUE, plot_p_value = TRUE)
ipsc_mn_als_datasets.polyA_total_stat.corr

Fig S10 Datasets independently

load(here(proj_path, "expression/deseq2/datasets_separated.RData"))

Volcano

ipsc_mn_datasets_list.res = list("AnswerALS" = answerals$res, "neurolincs.diMN" = neurolincs.diMN$res, "neurolincs.iMN" = neurolincs.iMN$res, "catanese" = catanese$res, "dafinca C9orf72" = dafinca.c9orf72$res, "dafinca tardbp" = dafinca.tardbp$res, "luisier" = luisier$res, "wang" = wang$res, "kiskinis" = kiskinis$res, "sareen" = sareen$res, "sommer" = sommer$res, "sterneckert" = sterneckert$res, "kapeli" = kapeli$res, "desantis" = desantis$res, "smith" = smith$res, "bhinge" = bhinge$res, "hawkins" = hawkins$res)
ipsc_mn_datasets.res = bind_rows(ipsc_mn_datasets_list.res, .id = "dataset")

datasets_separated.volcano = volcano_plot(ipsc_mn_datasets.res, arrow_labels = FALSE, dpi = 300) + facet_wrap(~dataset, scales = "free", ncol = 6)
datasets_separated.volcano

Correlation heatmap

# heatmap of correlation coefficients
ipsc_mn_datasets_stat = select(answerals$res, gene_id, answerals = stat) %>% 
  left_join(select(neurolincs.diMN$res, gene_id, neurolincs.diMN = stat)) %>%
  left_join(select(neurolincs.iMN$res, gene_id, neurolincs.iMN = stat)) %>%
  left_join(select(catanese$res, gene_id, catanese = stat)) %>%
  left_join(select(dafinca.c9orf72$res, gene_id, dafinca.c9orf72 = stat)) %>%
  left_join(select(dafinca.tardbp$res, gene_id, dafinca.tardbp = stat)) %>%
  left_join(select(luisier$res, gene_id, luisier = stat)) %>%
  left_join(select(wang$res, gene_id, wang = stat)) %>%
  left_join(select(kiskinis$res, gene_id, kiskinis = stat)) %>%
  left_join(select(sareen$res, gene_id, sareen = stat)) %>%
  left_join(select(sommer$res, gene_id, sommer = stat)) %>%
  left_join(select(sterneckert$res, gene_id, sterneckert = stat)) %>%
  left_join(select(kapeli$res, gene_id, kapeli = stat)) %>%
  left_join(select(desantis$res, gene_id, desantis = stat)) %>%
  left_join(select(smith$res, gene_id, smith = stat)) %>%
  left_join(select(bhinge$res, gene_id, bhinge = stat)) %>%
  left_join(select(hawkins$res, gene_id, hawkins = stat)) %>%
  drop_na() %>%
  column_to_rownames("gene_id")
cormat <- round(cor(ipsc_mn_datasets_stat),2)
melted_cormat <- melt(cormat)
get_lower_tri<-function(cormat){ # Get lower triangle of the correlation matrix
  cormat[upper.tri(cormat)] <- NA
  return(cormat)
}
get_upper_tri <- function(cormat){ # Get upper triangle of the correlation matrix
  cormat[lower.tri(cormat)]<- NA
  return(cormat)
  }
upper_tri <- get_upper_tri(cormat)
melted_cormat <- melt(upper_tri, na.rm = TRUE)
reorder_cormat <- function(cormat){ # Use correlation between variables as distance
  dd <- as.dist((1-cormat)/2)
  hc <- hclust(dd)
  cormat <-cormat[hc$order, hc$order]
}
cormat <- reorder_cormat(cormat)
upper_tri <- get_upper_tri(cormat)
melted_cormat <- melt(upper_tri, na.rm = TRUE) # Melt the correlation matrix
dataset_dge_correlation_ggheatmap <- ggplot(melted_cormat, aes(Var2, Var1, fill = value))+
  geom_tile(color = "white") +  scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, limit = c(-1,1), space = "Lab", name="Pearson\nCorrelation") + 
  geom_text(aes(Var2, Var1, label = value), color = "black", size = 3)  +
  theme_minimal() + theme(axis.title.x = element_blank(), axis.title.y = element_blank(), panel.grid.major = element_blank(), panel.border = element_blank(), panel.background = element_blank(), axis.ticks = element_blank(),
                          legend.position = c(0.25,0.75), legend.direction = "horizontal", legend.title = element_text(size = 8), axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1), axis.text=element_text(size=7.5)) + # coord_fixed()+ 
  guides(fill = guide_colorbar(barwidth = 7, barheight = 1, title.position = "top", title.hjust = 0.5))
dataset_dge_correlation_ggheatmap

p53 PROGENy & TP53 Dorothea

ipsc_mn_datasets_list = list("AnswerALS" = select(answerals$res,gene_name,stat), "neurolincs.diMN" = select(neurolincs.diMN$res,gene_name,stat), "neurolincs.iMN" = select(neurolincs.iMN$res,gene_name,stat), "catanese" = select(catanese$res,gene_name,stat), "dafinca C9orf72" = select(dafinca.c9orf72$res,gene_name,stat), "dafinca tardbp" = select(dafinca.tardbp$res,gene_name,stat), "luisier" = select(luisier$res,gene_name,stat), "wang" = select(wang$res,gene_name,stat), "kiskinis" = select(kiskinis$res,gene_name,stat), "sareen" = select(sareen$res,gene_name,stat), "sommer" = select(sommer$res,gene_name,stat), "sterneckert" = select(sterneckert$res,gene_name,stat), "kapeli" = select(kapeli$res,gene_name,stat), "desantis" = select(desantis$res,gene_name,stat), "smith" = select(smith$res,gene_name,stat), "bhinge" = select(bhinge$res,gene_name,stat), "hawkins" = select(hawkins$res,gene_name,stat))
datasets <- names(ipsc_mn_datasets_list)
ipsc_mn_datasets_list = map(ipsc_mn_datasets_list, ~{drop_na(.x)}) 
ipsc_mn_datasets_list = map(ipsc_mn_datasets_list, ~{distinct(.x, gene_name, .keep_all = TRUE)})
ipsc_mn_datasets_list = map(ipsc_mn_datasets_list, ~{tibble_to_matrix(.x, stat, row_names = "gene_name")})

# PROGENy
# net_progeny <- get_progeny(organism = 'human', top = 100)
ipsc_mn_datasets_list.progeny_scores = map_df(ipsc_mn_datasets_list, ~{run_wmean(.x, net=net_progeny, .source='source', .target='target', .mor='weight', times = 1000, minsize = 5)}, .id = "dataset") %>% filter(statistic == 'norm_wmean')

ipsc_mn_datasets_list.p53.progeny_scores = ipsc_mn_datasets_list.progeny_scores %>% filter(source == "p53") %>% mutate(p.signif = case_when(p_value < 0.003 ~ "****", p_value < 0.009 ~ "***",p_value < 0.01 ~ "**",p_value < 0.05 ~ "*", TRUE ~ ""), group1 = dataset, group2 = dataset)
ipsc_mn_datasets_list.p53.progeny_scores.plot <- ggplot(ipsc_mn_datasets_list.p53.progeny_scores, aes(x=reorder(dataset, score), y = score)) +
    geom_bar(aes(fill = score), stat = "identity") +
  scale_fill_gradient2(low = "darkblue", high = "indianred", mid = "whitesmoke", midpoint = 0) + #scale_fill_npg() +  
  theme_oz() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), plot.title = element_text(hjust = 0.5), legend.position = "none") +
  geom_hline(yintercept = 0, linetype = 3) +
  labs(title = "p53 pathway activity", x = "", y = "Normalised enrichment") + #
  stat_pvalue_manual(ipsc_mn_datasets_list.p53.progeny_scores, label = "p.signif", y.position = 11.5, xmin = "dataset", xmax = NULL, size = 4, hide.ns = TRUE) + ylim(c(-12,12))
ipsc_mn_datasets_list.p53.progeny_scores.plot

# Dorothea
# net_dorothea <- get_dorothea(organism='human', levels=c('A', 'B', 'C'))
ipsc_mn_datasets_list.dorothea_scores = map_df(ipsc_mn_datasets_list, ~{run_wmean(.x, net=net_dorothea, .source='source', .target='target', .mor='mor', times = 1000, minsize = 5)}, .id = "dataset") %>% filter(statistic == 'norm_wmean')

ipsc_mn_datasets_list.TP53.dorothea_scores = ipsc_mn_datasets_list.dorothea_scores %>% filter(source == "TP53") %>% mutate(p.signif = case_when(p_value < 0.003 ~ "****", p_value < 0.009 ~ "***",p_value < 0.01 ~ "**",p_value < 0.05 ~ "*", TRUE ~ ""), group1 = dataset, group2 = dataset)
ipsc_mn_datasets_list.TP53.dorothea_scores.plot <- ggplot(ipsc_mn_datasets_list.TP53.dorothea_scores, aes(x=reorder(dataset, score), y = score)) +
    geom_bar(aes(fill = score), stat = "identity") +
  scale_fill_gradient2(low = "darkblue", high = "indianred", mid = "whitesmoke", midpoint = 0) + #scale_fill_npg() +  
  theme_oz() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), plot.title = element_text(hjust = 0.5), legend.position = "none") +
  geom_hline(yintercept = 0, linetype = 3) +
  labs(title = "TP53 Transcription Factor activity", x = "", y = "Normalised enrichment") +
  stat_pvalue_manual(ipsc_mn_datasets_list.TP53.dorothea_scores, label = "p.signif", y.position = 13, xmin = "dataset", xmax = NULL, size = 4, hide.ns = TRUE) + ylim(c(-13,13))
ipsc_mn_datasets_list.TP53.dorothea_scores.plot

Fig S11 Proteomics AnswerALS

answerals.proteomics.als_vs_ctrl.res = read_csv(here(shared_path,"/answerals/proteomics/answerals_proteomics_als_vs_ctrl.csv"))

Volcano & GO & GSEA

answerals.proteomics.als_vs_ctrl.volcano = volcano_plot(answerals.proteomics.als_vs_ctrl.res, title = "Protein Expression", subtitle = "171 ALS vs 33 CTRL", 
  gene_labels = c(pull(top_n(answerals.proteomics.als_vs_ctrl.res,n=10,wt=stat),gene_name), pull(filter(answerals.proteomics.als_vs_ctrl.res, pvalue < 0.05, gene_name %in% c(ALS_genes,p53.signalling.pathway)),gene_name), "TARDBP","FUS","SFPQ","SOD1", "BRD4", "EEF2"),
                  ymax = 4,xmax = 2,numerator = "ALS",denomenator = "CTRL", significance_threshold = "pvalue", dpi = 300)
answerals.proteomics.als_vs_ctrl.volcano

# GO terms
answerals.proteomics.als_vs_ctrl.gost <- answerals.proteomics.als_vs_ctrl.res %>% filter(pvalue < 0.05) %>% group_split(log2FoldChange > 0) %>%  map("gene_id") %>% gost()
answerals.proteomics.als_vs_ctrl.gprofiles <- answerals.proteomics.als_vs_ctrl.gost$result %>% filter(!str_detect(source, "TF|CORUM|HPA")) %>% arrange( -log10(p_value) ) %>% mutate(query = case_when(query == "query_1" ~ "ALS\nDown", query == "query_2" ~ "ALS\nUp"), term_name = as.factor(term_name))
answerals.proteomics.als_vs_ctrl.gprofiles_down <- answerals.proteomics.als_vs_ctrl.gprofiles %>% filter(query == "ALS\nDown")
answerals.proteomics.als_vs_ctrl.gprofiles_up <- answerals.proteomics.als_vs_ctrl.gprofiles %>% filter(query == "ALS\nUp")
down_list <- c(
"Schaffer collateral - CA1 synapse",
"Golgi transport complex")
answerals.proteomics.als_vs_ctrl.gprofiles_filt_down <- answerals.proteomics.als_vs_ctrl.gprofiles_down %>% filter(query == "ALS\nDown")  %>% filter(term_name %in% down_list)
up_list <- c(
"proteasome complex",
"Axon guidance",
"nucleocytoplasmic transport",
"Cholesterol biosynthesis",
"Amyotrophic lateral sclerosis",
"protein modification process",
"Metabolism of proteins",
"Cellular responses to stress",
"protein localization",
"endoplasmic reticulum",
"protein binding",
"Metabolism of RNA",
"translation",
"protein metabolic process")
answerals.proteomics.als_vs_ctrl.gprofiles_filt_up <- answerals.proteomics.als_vs_ctrl.gprofiles_up %>% filter(query == "ALS\nUp")  %>% filter(term_name %in% up_list)
answerals.proteomics.als_vs_ctrl.gprofiles_filt_combined <- bind_rows(answerals.proteomics.als_vs_ctrl.gprofiles_filt_down, answerals.proteomics.als_vs_ctrl.gprofiles_filt_up) %>% mutate(query = factor(query, levels = c("ALS\nUp", "ALS\nDown")))
answerals.proteomics.als_vs_ctrl.go_curated <- curated_profiles(gprofiles = answerals.proteomics.als_vs_ctrl.gprofiles_filt_combined,label_angle = 0) + theme(legend.position = "none", axis.text = element_text(size=10))
answerals.proteomics.als_vs_ctrl.go_curated

## GSEA: DNA damage response, p53 signalling pathway
answerals.proteomics.als_vs_ctrl.geneList <- answerals.proteomics.als_vs_ctrl.res %>% drop_na(stat) %>% pull(stat)
names(answerals.proteomics.als_vs_ctrl.geneList) <- answerals.proteomics.als_vs_ctrl.res %>% drop_na(stat) %>% pull(gene_name) %>% as.character()
answerals.proteomics.als_vs_ctrl.geneList = sort(answerals.proteomics.als_vs_ctrl.geneList, decreasing = TRUE)

# p53 signalling pathway
p53.signalling_genes.list <- list("p53.signalling" = answerals.proteomics.als_vs_ctrl.res$gene_name[answerals.proteomics.als_vs_ctrl.res$gene_name %in% p53.signalling.pathway])
answerals.proteomics.als_vs_ctrl.p53.signalling_fgseaRes <- fgsea(pathways = p53.signalling_genes.list, stats = answerals.proteomics.als_vs_ctrl.geneList)
answerals.proteomics.als_vs_ctrl.p53.signalling_fgseaRes
##           pathway      pval      padj   log2err        ES     NES size
## 1: p53.signalling 0.4594017 0.4594017 0.0525899 0.2633203 1.00739  103
##                                leadingEdge
## 1: RBBP7,CSNK2B,PRMT1,CNOT9,RPL23,RRM2,...
# pathway      pval      padj    log2err        ES       NES size                             leadingEdge
# 1: p53.signalling 0.5122995 0.5122995 0.04763873 0.2633203 0.9831828  103 RBBP7,CSNK2B,PRMT1,CNOT9,RPL23,RRM2,...
answerals.proteomics.als_vs_ctrl.p53.signalling.gseaplot = plotEnrichment(p53.signalling_genes.list[["p53.signalling"]], answerals.proteomics.als_vs_ctrl.geneList) + 
  theme(axis.text=element_text(size=8), axis.title=element_text(size=10)) + ggtitle("p53 signalling") + xlab("") +
  annotate("text", x = 3000, y = 0.2, label = paste0("NES = ", round(answerals.proteomics.als_vs_ctrl.p53.signalling_fgseaRes$NES, digits = 3),"\nP = ", round(answerals.proteomics.als_vs_ctrl.p53.signalling_fgseaRes$pval, digits = 6)), size = 2.8, hjust = 0) + theme_oz()
answerals.proteomics.als_vs_ctrl.p53.signalling.gseaplot

Protein & RNA correlation

# Compare RNAseq to mass spec
ipsc_mn_rnaseq_mass_spec.join = select(answerals.proteomics.als_vs_ctrl.res, gene_name, gene_id, MS.lfc = log2FoldChange, MS.pvalue = pvalue, MS.padj = padj, MS.stat = stat) %>% 
  left_join(select(ipsc_mn_als_datasets.res, gene_name, gene_id, RNA.lfc = log2FoldChange, RNA.pvalue = pvalue, RNA.padj = padj, RNA.stat = stat))

# Correlate RNAseq & Mass Spec
ipsc_mn_rnaseq_mass_spec = list("RNA-seq: ALS vs CTRL" = ipsc_mn_als_datasets.res, "Mass Spec: ALS vs CTRL" = answerals.proteomics.als_vs_ctrl.res)
ipsc_mn_rnaseq_mass_spec.scatter = plot_de_correlation(model_list1 = ipsc_mn_rnaseq_mass_spec, model_list2 = ipsc_mn_rnaseq_mass_spec, mutation1 = "RNA-seq: ALS vs CTRL", mutation2 = "Mass Spec: ALS vs CTRL", col = "log2FoldChange", gene_labels = c("MACROD1","EEFSEC","EIF1AY","H3C4","CEMIP","PTGR1","HOXC8","KRT1","IGFBP5","TPX2", "TARDBP","FUS","SFPQ","SOD1"), plot_p_value = TRUE) & xlim(-0.9,0.6) & ylim(-1.8,1.9)
ipsc_mn_rnaseq_mass_spec.scatter