-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathGO_function_sc.R
More file actions
134 lines (108 loc) · 5.7 KB
/
GO_function_sc.R
File metadata and controls
134 lines (108 loc) · 5.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# for gene list
GO_function <- function(df,cell_type, pval = 0.05, onto= "MF", num_pathways=15, prefix="", org="mouse"){
# df <- deg_mouse
# cell_type = "Endothelial_cells"
# pval = 0.05
# onto= "BP"
# num_pathways=15
# prefix="LR_R_analysis/results/GO_analysis/Biological_Processes/Endothelial_cells"
# org="mouse"
require(clusterProfiler)
require(org.Mm.eg.db)
require(ReactomePA)
require(AnnotationDbi)
require(org.Hs.eg.db)
require(ggplot2)
# print message for cell type
message(paste0("Starting GO analysis for Cell Type : ", cell_type))
# get gene list, double check column names: gene (mouse), adjpvalue, cell_type
gene_list <- df$MGI.symbol[df$cell_type==cell_type & df$adjpvalue <=0.05] #filters gene list to only include DEGs of chosen cell type
#if no differential gene then give message
if(length(gene_list)==0){
message(paste0("No differential gene for Cell Type : ", cell_type))
message(paste0("****************************************************************************************"))
message(paste0("\n"))
}
if(org=="mouse"){
orgdb <- "org.Mm.eg.db"
org_reactome <- "mouse"
}else if(org=="human"){
orgdb <- "org.Hs.eg.db"
org_reactome <- "human"
}else{
message("Please enter a valid organism (mouse or human)")
}
if(onto %in% c("MF", "CC", "BP")){
compGO <- enrichGO(gene = gene_list, pvalueCutoff = pval,keyType = "SYMBOL",
pAdjustMethod = "BH",OrgDb = orgdb, ont = onto)
}else if(onto=="reactome"){
gene_list <- bitr(gene_list, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = orgdb)
gene_list <- gene_list$ENTREZID
compGO <- enrichPathway(gene = gene_list, pvalueCutoff = 0.05, organism= org_reactome, readable = TRUE)
}else{
message("Please enter a valid GO term")
}
if(is.null(compGO)){
message(paste0("No GO:",onto, " obtained"))
message(paste0("****************************************************************************************"))
message(paste0("\n"))
}else {
compGO_df <- as.data.frame(compGO)
compGO_df$GeneRatio_decimal <- compGO_df$GeneRatio
compGO_df$GeneRatio_decimal <- sapply(compGO_df$GeneRatio_decimal,
function(x) (eval(parse(text = as.character(x)))))
compGO_df$BgRatio_decimal <- compGO_df$BgRatio
compGO_df$BgRatio_decimal <- sapply(compGO_df$BgRatio_decimal,
function(x) (eval(parse(text = as.character(x)))))
compGO_df <- compGO_df %>% tidyr::separate_rows(geneID, sep = "/", convert = FALSE) %>% #separate each gene within a pathway in individual rows
arrange(desc(GeneRatio_decimal))
compGO_df$Description <- factor(compGO_df$Description, levels=unique(compGO_df$Description)) #name of pathway
pathways <- unique(compGO_df$Description)
pathways_selected <- pathways[1:num_pathways] #select top 15 pathways
compGO_df %>% head
if(nrow(compGO_df)==0){
message(paste0("No GO:",onto, " obtained"))
message(paste0("****************************************************************************************"))
message(paste0("\n"))
} else{
write.csv(compGO_df, paste0(prefix,"_GO_",onto, "_pathways.csv"))
full_name= switch(onto,
MF= "Molecular Function",
CC= "Cellular Components",
BP= "Biological Pathways",
reactome= "Reactome Pathways"
)
compGO_df_subset <- compGO_df[compGO_df$Description %in% pathways_selected,] #subset to only include top 15 pathways
compGO_df_subset <- compGO_df_subset %>% arrange((GeneRatio_decimal)) #arrange by GeneRatio_decimal
compGO_df_subset$Description <- factor(compGO_df_subset$Description, levels=unique(compGO_df_subset$Description))
# print(head(compGO_df_subset))
p <- ggplot2::ggplot(data = (compGO_df_subset), # make ggplot object
aes(x = GeneRatio_decimal, y = Description, # x axis is GeneRatio_decimal, y axis is Description
color = p.adjust, size = Count)) + # color by p.adjust, size of dot by Count
geom_point(alpha = 0.5) + # add points with alpha 0.5
scale_color_gradient(low = "red", high ="blue" ,guide=guide_colourbar(reverse = TRUE)) + # set color gradient to blue to red
# facet_grid(vars(GO_class), scales = "free", space = "free_y") + # facet by GO_class with free scales and free y axis
theme_bw() + # set theme to black and white
labs(title = paste0("GO Pathway Enrichment Analysis \n", full_name, " for Cell Type: ", cell_type), x = "GeneRatio", y = "") + # set title and axis labels
theme(plot.title = element_text(hjust = 0.5), axis.text = element_text(size = 10), # set theme for plot title and axis text
axis.title.x = element_text(size = 10), axis.title.y = element_text(size = 10),
axis.text.x=element_text(colour="black"),
axis.text.y=element_text(colour="black")) +
# change color of Counts in legend to black
guides(size = guide_legend(override.aes = list(color = "black", alpha=1)))
print(p)
dev.copy(
pdf,
file = paste0(prefix,"_GO_",onto, "_pathways.pdf"),
width = 10,
height = 12
)
dev.off ()
message(paste0("Pathway analysis GO:",onto, " done"))
message(paste0("****************************************************************************************"))
message(paste0("\n"))
}
}
}
lapply(cell_types, function(x) GO_function(deg_mouse, x, pval = 0.05, onto= "BP",
prefix= paste0("LR_R_analysis/results/GO_analysis/Biological_Processes/", x), org="mouse"))