-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathClusteredHeatmapWithLineChartExample.R
More file actions
74 lines (50 loc) · 2.66 KB
/
ClusteredHeatmapWithLineChartExample.R
File metadata and controls
74 lines (50 loc) · 2.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
library(RColorBrewer)
library(ComplexHeatmap)
library(circlize)
# start with output from MSstats::DataProcess. In my case, mine is called 'dp.damgo.noBG' below
#####################
# prepare the data
#####################
# get a single quantity per protein/group
groupQuant <- MSstats::quantification(dp.morphine.noBG, type = "Group", format = "long")
setDT(groupQuant) # a memory efficient way to convert a data.frame to a data.table
# scaledIntensity has some advantages over LogIntensity:
# 1) Missing values can be assigned intensity = 0.0 with little impact on clustering
# 2) Linear scale is more intuitive for some people than a log scale
# 3) Low abundance proteins and high abundance proteins are treated equally
groupQuant[,scaledIntensity := 2^(LogIntensity - max(LogIntensity, na.rm=TRUE)), by = "Protein" ]
# covnert to wide format, then a matrix
quant.wide <- dcast (groupQuant, Protein~Group, value.var = "scaledIntensity")
quant.mat <- as.matrix(quant.wide, rownames = "Protein")
# this is important later for setting the missing values to gray in the heatmap
minNonMissing <- min (quant.mat, na.rm=TRUE)
# set missing values to 0.0
quant.mat[is.na(quant.mat)] <- 0.0
#################
# clustering
###################
# choose how many clusters you want, best is actually to try a few, here I just use 8
k <- 8
# kmeans depends on randomly choosing a starting point and iterating
# for repeatable results if/when I run this again, this initializes the random number generator
set.seed(1)
km.clusters <- kmeans(quant.mat, centers=k, nstart=100, iter.max=20)
##################
# Heatmap
#################
stopifnot(rownames(quant.mat) == names(km.clusters$cluster))
clusterColors <- brewer.pal(n = 12, name = "Paired")[km.clusters$cluster]
names(clusterColors) <- names(km.clusters$cluster)
cols <- brewer.pal(n = 12, name = "Paired")
names(cols) <- cols
clusterAnn <- rowAnnotation(cluster=clusterColors, col = list(cluster=cols))
topAnno <-HeatmapAnnotation(clusterCenters = anno_lines(t(km.clusters$centers),
height=unit(1, "inches"),
gp = gpar(col = cols)),
annotation_name_side = "left")
hms <- topAnno %v% Heatmap(quant.mat, name = "Scaled\nIntensity",
cluster_columns = FALSE, show_row_dend = FALSE, show_row_names=FALSE,
row_split = km.clusters$cluster,left_annotation = clusterAnn,
col = circlize::colorRamp2(c(0,minNonMissing * 1e-3, 0.5, 1.0),
c("gray", "blue", "white", "red")))
draw(hms)