-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathMakefile
More file actions
246 lines (204 loc) · 8.38 KB
/
Makefile
File metadata and controls
246 lines (204 loc) · 8.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
# ===============================
# LoCo C++ Project Makefile
# Dependencies: nanoflann & igraph, boost, gfortran, arpack
# ===============================
# Directories
SRC_DIR = src
BUILD_DIR = build
BIN_DIR = bin
TOOL_DIR = tools
LIB = src/lib
INCLUDE_DIRS := -Iinclude -I$(SRC_DIR)
#add path to nanoflann
NANO_INCLUDE = -Iinst/include
CXXFLAGS = -std=c++17 -O3 -Wall -Wextra $(INCLUDE_DIRS) $(NANO_INCLUDE)
# add LTO only for Linux/Mac
ifneq ($(IS_LINUX),)
CXXFLAGS += -flto=5
else ifneq ($(IS_DARWIN),)
CXXFLAGS += -flto
endif
# Source files
SRC_FILES := $(shell find src -name "*.cpp" \
! -name "loco_R.cpp" \
! -name "RcppExports.cpp")
SRC_FILES += tools/LoCo.cpp
OBJ_FILES := $(patsubst %.cpp,$(BUILD_DIR)/%.o,$(SRC_FILES))
#######################################
# PLATFORM DETECTION
#######################################
#definitions for systems
UNAME_S := $(shell uname -s)
IS_LINUX := $(filter Linux,$(UNAME_S))
IS_DARWIN := $(filter Darwin,$(UNAME_S))
IS_WIN := $(filter MINGW% MSYS% CYGWIN%,$(UNAME_S))
#######################################
# BOOST HANDLING (ROBUST, CROSS-PLATFORM)
#######################################
# Default empty
BOOST_FLAGS :=
BOOST_INCLUDE :=
BOOST_LIB :=
#######################################
# Linux
#######################################
ifneq ($(IS_LINUX),)
BOOST_FLAGS := -lboost_program_options -lpthread -lz
endif
#######################################
# macOS
#######################################
ifneq ($(IS_DARWIN),)
BOOST_PREFIX := $(shell brew --prefix boost 2>/dev/null || echo /opt/homebrew)
BOOST_INCLUDE := $(BOOST_PREFIX)/include
BOOST_LIB := $(BOOST_PREFIX)/lib
BOOST_FLAGS := -I$(BOOST_INCLUDE) -L$(BOOST_LIB) -lboost_program_options -lpthread -lz
endif
#######################################
# Windows (vcpkg, static, ROBUST)
#######################################
ifneq ($(IS_WIN),)
VCPKG_ROOT ?= C:/vcpkg
BOOST_TRIPLET := x64-mingw-static
BOOST_INCLUDE := $(VCPKG_ROOT)/installed/$(BOOST_TRIPLET)/include
BOOST_LIB := $(VCPKG_ROOT)/installed/$(BOOST_TRIPLET)/lib
# helper: resolve actual filenames (important!)
boost_file = $(notdir $(firstword $(wildcard $(BOOST_LIB)/libboost_$(1)*.a)))
BOOST_PO_FILE = $(call boost_file,program_options)
BOOST_LIBS := \
$(if $(BOOST_PO_FILE),-l:$(BOOST_PO_FILE))
BOOST_FLAGS = -I"$(BOOST_INCLUDE)" -L"$(BOOST_LIB)" -Wl,-Bstatic \
$(BOOST_LIBS) -lz -lwinpthread -lws2_32
endif
LDFLAGS := $(BOOST_FLAGS)
ifneq ($(IS_WIN),)
LDFLAGS += -static -static-libgcc -static-libstdc++
endif
#######################################
# FINAL BOOST FLAGS (USED BY COMPILER)
#######################################
#add boost disr to include flags - important for windows and macOS
INCLUDE_DIRS += -Isrc $(if $(BOOST_INCLUDE),-I$(BOOST_INCLUDE))
#######################################
# INSTALL DEPENDENCIES (SYSTEM-DEPENDENT)
#######################################
install:
# -------------------------------
# NANOFLANN is included as a header only in the repository - therefore we origionally used the branch below
# -------------------------------
# mkdir -p inst/include
# cd inst/include && \
# if [ ! -d nanoflann ]; then \
# git clone https://github.com/jlblancoc/nanoflann --branch v1.3.2; \
# else \
# echo "nanoflann already exists, skipping clone"; \
# fi
#######################################
# LINUX (APT / SYSTEM)
#######################################
@if [ "$(IS_LINUX)" = "Linux" ]; then \
echo "Installing dependencies (Linux)..."; \
sudo apt-get update && sudo apt-get install -y \
libboost-program-options-dev \
zlib1g-dev; \
fi
#######################################
# macOS (Homebrew)
#######################################
@if [ "$(IS_DARWIN)" = "Darwin" ]; then \
echo "Installing dependencies (macOS)..."; \
brew update; \
brew install boost zlib || true; \
fi
#######################################
# Windows (vcpkg)
#######################################
@if echo "$(UNAME_S)" | grep -E -q "MINGW|MSYS|CYGWIN"; then \
echo "Installing dependencies (Windows via vcpkg)..."; \
$(VCPKG_ROOT)/vcpkg install \
boost-program-options \
zlib \
--triplet x64-mingw-static; \
fi
mkdir -p $(BUILD_DIR)
mkdir -p $(BIN_DIR)
@echo "Dependencies installed."
# ===============================
# Build LoCo
# ===============================
loco: $(OBJ_FILES) | $(BIN_DIR)
@mkdir -p $(BIN_DIR)
$(CXX) $(OBJ_FILES) $(LDFLAGS) -o $(BIN_DIR)/loco
# ===============================
# Compile sources to object files
# ===============================
$(BUILD_DIR)/%.o: %.cpp | $(BUILD_DIR)
@mkdir -p $(dir $@)
$(CXX) -c $< -o $@ $(CXXFLAGS)
#$(BUILD_DIR)/LoCo.o: tools/LoCo.cpp
# @mkdir -p $(dir $@)
# $(CXX) $(CXXFLAGS) -c $< -o $@
# ===============================
# Directories
# ===============================
$(BUILD_DIR):
mkdir -p $(BUILD_DIR)
$(BIN_DIR):
mkdir -p $(BIN_DIR)
# ===============================
# Clean
# ===============================
.PHONY: clean install loco
clean:
rm -rf $(BUILD_DIR) $(BIN_DIR)
#test dataset has 4 correlated var that range through grpah from low to high corr, 4 medium constant corr and a bunch of non corr
#this dataset is a bit smaller and noisy, we do not check results automatically
# we test it with 1 and with 5 threads
test_loco_a:
./bin/loco -i ./test/simulatedData1.tsv -o bin/ -p test_a -n 20 -x 0.3 -s 25 -t 1 -q 1
./bin/loco -i ./test/simulatedData1.tsv -o bin/ -p test_a -n 20 -x 0.3 -s 25 -t 5 -q 1
#simple test where we have 50 non corr variables of two clsuters, then in each cluster diff 3 var correlated
#you should run it with printing cliques and find in each neighborhood roughly ONLY 1,2,3 or 4,5,6
# RESULT: this should always find the two correlatuion sets A,B,C and D,E,F
test_loco_b:
./bin/loco -i ./test/simulatedData2.tsv -o bin/ -p test_b -c -v ./test/cellStateGenes.txt -w ./test/cellSignalGenes.txt -t 10 -f 1
./test/test_b.sh
#tets with 1000 cells and 58 features
# 20 totally reandom
# 5 (A,B,C,D,E) of correaltions (between all the same) from 0.1 up to 1 (in 100 steps with 10cells in every 'step')
#additioannly 20 of corr 0.75 (same, no change in corr). However, we order the cells to follow the increasing corr of first 4 AB (A, B,C ,D,E)
# So the ABs of low correlation are also low in 1:20-0.75_corr and as those increase (but same corr) the corr of A,B,C,D,E increases
#=> idea: const corr of 0.75 gives graph the structure and following this structure the corr of first 4 should increase
#THIS SHOULD DETECT ONLY THE FIRST 5 ABs, the other 20 have good correlations, but those DO NOT CHANGE along the cell-manifold
# we run 5X: ./bin/loco -i ./test/simulatedData3.tsv -o bin -p test_c -c -n 20 -s 100 -x 0.5 -t 50 -q 2 -m 2 -a 0.01
# and check that 4times we have in the top 5 correlations any pairs of A,B,C,D,E & this set is reported everytime and we have p-values<0.05
test_loco_c:
./test/test_c.sh
#increasing n to 100 makes everything significant
# simulate the signlaing markers also as markers with a signoidal activation
test_run_loco_sigmoidal:
./bin/loco -i ./test/data_1.tsv -o bin/ -p data_1 -c -n 100 -s 50 -x 0.4 -z 1 -t 50 -m 2 -q 2 -a 0.01 -u 1000
# test aboce script 5 times and make sure in the top 5 correlations pairs we only see pairs of the middle program (Ms) or an end program (Es)
test_loco_sigmoidal:
./test/test_sigmoidal.sh
test_loco_sigmoidal_granularities:
./bin/loco -i ./test/data_1.tsv -o bin/ -p data_1 -c -n 100 -s [10,50,100,200] -x 0.2 -z 1 -t 50 -m 2 -q 2 -a 0.01 -u 1000
#-v ./test/paperCellstateMarkers.txt
#-v ./test/paperCellstateMarkers.txt -w ./test/paperCellsignalMarkers.txt
# simulate the signaling markers as uniformal distributions
test_loco_uniform:
./bin/loco -i ./test/data_2.tsv -o bin/ -p data_2 -c -n 1000 -s 100 -x 0.5 -z 1 -t 10 -v test/paperCellstateMarkers.txt -w test/paperCellsignalMarkers.txt
#5K cells,m when having more than 50N p-values seem to not make sense anymore
test_loco_uniform_noSignalMarkers:
./bin/loco -i ./test/data_2.tsv -o bin/ -p data_2_b -c -n 50 -s 100 -x 0.5 -z 1 -t 10 -v test/paperCellstateMarkers.txt
test_corr_state_markers:
./bin/loco -i ./test/data3.tsv -o bin/ -p data3 -c -n 1 -s 200 -x 0.4 -a 0 -z 1 -t 10 -w test/data3_signalMarkers.txt -v test/data3_stateMarkers.txt
test_loco:
make test_loco_a
make test_loco_b
make test_loco_c
make test_loco_sigmoidal
make test_run_loco_sigmoidal
make test_loco_uniform
make test_loco_sigmoidal_granularities
make test_corr_state_markers