|
35 | 35 | }, |
36 | 36 | { |
37 | 37 | "cell_type": "code", |
38 | | - "execution_count": null, |
| 38 | + "execution_count": 1, |
39 | 39 | "id": "42f85552", |
40 | 40 | "metadata": {}, |
41 | 41 | "outputs": [], |
|
47 | 47 | }, |
48 | 48 | { |
49 | 49 | "cell_type": "code", |
50 | | - "execution_count": null, |
| 50 | + "execution_count": 2, |
51 | 51 | "id": "56e8e52f", |
52 | 52 | "metadata": {}, |
53 | | - "outputs": [], |
| 53 | + "outputs": [ |
| 54 | + { |
| 55 | + "name": "stdout", |
| 56 | + "output_type": "stream", |
| 57 | + "text": [ |
| 58 | + "Demography set to file /home/fcooper/GitRepos/PalamaraLab/PrepareDecoding/test/regression/input_CEU.demo\n", |
| 59 | + "Discretization file set to file /home/fcooper/GitRepos/PalamaraLab/PrepareDecoding/test/regression/input_30-100-2000.disc\n", |
| 60 | + "Frequencies file set to /home/fcooper/GitRepos/PalamaraLab/PrepareDecoding/test/regression/input_UKBB.frq\n", |
| 61 | + "New CSFS will be calculated\n", |
| 62 | + "Will read discretization intervals from /home/fcooper/GitRepos/PalamaraLab/PrepareDecoding/test/regression/input_30-100-2000.disc ...\n", |
| 63 | + "smcpp: init_cache() successful\n", |
| 64 | + "smcpp: storing cache: /tmp/smcpp_matrices.dat\n", |
| 65 | + "smcpp: store_cache() successful\n", |
| 66 | + "Will use minor allele frequencies from /home/fcooper/GitRepos/PalamaraLab/PrepareDecoding/test/regression/input_UKBB.frq ...\n", |
| 67 | + "Will use mutation rate mu = 1.65e-08.\n", |
| 68 | + "Number of samples in CSFS calculations: 50.\n", |
| 69 | + "Verified 69 CSFS entries.\n", |
| 70 | + "Probability of a site being monomorphic due to subsampling: 0.222911939663710\n", |
| 71 | + "\n", |
| 72 | + "Building decoding quantities...\n", |
| 73 | + "Genetic distances progress: 100%\t\n", |
| 74 | + "Physical distances progress: 100%\t\n", |
| 75 | + "Demography set to file /home/fcooper/GitRepos/PalamaraLab/PrepareDecoding/test/regression/input_CEU.demo\n", |
| 76 | + "Frequencies file set to /home/fcooper/GitRepos/PalamaraLab/PrepareDecoding/test/regression/input_UKBB.frq\n", |
| 77 | + "New CSFS will be calculated\n", |
| 78 | + "Using the following pre-specified discretization intervals: {0, 30, 60, 90, 120, 150, 180, 210, 240, 270, 300, 330, 360, 390, 420, 450, 550, 650, 750, 850, 950, 1050, 1150, 1250, 1350, 1450, 1550, 1650, 1750, 1850, 1950}\n", |
| 79 | + " and calculating 39 additional intervals from coalescent distribution.\n", |
| 80 | + "smcpp: init_cache() successful\n", |
| 81 | + "Will use minor allele frequencies from /home/fcooper/GitRepos/PalamaraLab/PrepareDecoding/test/regression/input_UKBB.frq ...\n", |
| 82 | + "Will use mutation rate mu = 1.65e-08.\n", |
| 83 | + "Number of samples in CSFS calculations: 50.\n", |
| 84 | + "Verified 70 CSFS entries.\n", |
| 85 | + "Probability of a site being monomorphic due to subsampling: 0.222911939663710\n", |
| 86 | + "\n", |
| 87 | + "Building decoding quantities...\n", |
| 88 | + "Genetic distances progress: 100%\t\n", |
| 89 | + "Physical distances progress: 100%\t\n", |
| 90 | + "Demography set to built-in CEU\n", |
| 91 | + "Frequencies file set to /home/fcooper/GitRepos/PalamaraLab/PrepareDecoding/test/regression/input_UKBB.frq\n", |
| 92 | + "New CSFS will be calculated\n", |
| 93 | + "Using the following pre-specified discretization intervals: {0, 30, 60, 90, 120, 150, 180, 210, 240, 270, 300, 330, 360, 390, 420, 450, 550, 650, 750, 850, 950, 1050, 1150, 1250, 1350, 1450, 1550, 1650, 1750, 1850, 1950}\n", |
| 94 | + " and calculating 39 additional intervals from coalescent distribution.\n", |
| 95 | + "smcpp: init_cache() successful\n", |
| 96 | + "Will use minor allele frequencies from /home/fcooper/GitRepos/PalamaraLab/PrepareDecoding/test/regression/input_UKBB.frq ...\n", |
| 97 | + "Will use mutation rate mu = 1.65e-08.\n", |
| 98 | + "Number of samples in CSFS calculations: 50.\n", |
| 99 | + "Verified 70 CSFS entries.\n", |
| 100 | + "Probability of a site being monomorphic due to subsampling: 0.222911939663710\n", |
| 101 | + "\n", |
| 102 | + "Building decoding quantities...\n", |
| 103 | + "Genetic distances progress: 100%\t\n", |
| 104 | + "Physical distances progress: 100%\t\n", |
| 105 | + "Demography set to built-in CEU\n", |
| 106 | + "Frequencies set to built-in UKBB with 50 samples\n", |
| 107 | + "New CSFS will be calculated\n", |
| 108 | + "Using the following pre-specified discretization intervals: {0, 30, 60, 90, 120, 150, 180, 210, 240, 270, 300, 330, 360, 390, 420, 450, 550, 650, 750, 850, 950, 1050, 1150, 1250, 1350, 1450, 1550, 1650, 1750, 1850, 1950}\n", |
| 109 | + " and calculating 39 additional intervals from coalescent distribution.\n", |
| 110 | + "smcpp: init_cache() successful\n", |
| 111 | + "Using built-in frequency information from UKBB ...\n", |
| 112 | + "Will use mutation rate mu = 1.65e-08.\n", |
| 113 | + "Number of samples in CSFS calculations: 50.\n", |
| 114 | + "Verified 70 CSFS entries.\n", |
| 115 | + "\n", |
| 116 | + "Building decoding quantities...\n", |
| 117 | + "Genetic distances progress: 100%\t\n", |
| 118 | + "Physical distances progress: 100%\t" |
| 119 | + ] |
| 120 | + } |
| 121 | + ], |
54 | 122 | "source": [ |
55 | 123 | "files_dir = (pathlib.Path('..') / 'test' / 'regression').resolve()\n", |
56 | 124 | "\n", |
|
104 | 172 | }, |
105 | 173 | { |
106 | 174 | "cell_type": "code", |
107 | | - "execution_count": null, |
| 175 | + "execution_count": 3, |
108 | 176 | "id": "e1d704d3", |
109 | 177 | "metadata": {}, |
110 | 178 | "outputs": [], |
|
133 | 201 | }, |
134 | 202 | { |
135 | 203 | "cell_type": "code", |
136 | | - "execution_count": null, |
| 204 | + "execution_count": 4, |
137 | 205 | "id": "84e495bb", |
138 | 206 | "metadata": {}, |
139 | | - "outputs": [], |
| 207 | + "outputs": [ |
| 208 | + { |
| 209 | + "name": "stdout", |
| 210 | + "output_type": "stream", |
| 211 | + "text": [ |
| 212 | + "\n", |
| 213 | + "Demography set to built-in CEU\n", |
| 214 | + "Frequencies set to built-in UKBB with 50 samples\n", |
| 215 | + "Precomputed CSFS will be loaded from file: /home/fcooper/GitRepos/PalamaraLab/PrepareDecoding/test/regression/output.csfs\n", |
| 216 | + "Using the following pre-specified discretization intervals: {0, 30, 60, 90, 120, 150, 180, 210, 240, 270, 300, 330, 360, 390, 420, 450, 550, 650, 750, 850, 950, 1050, 1150, 1250, 1350, 1450, 1550, 1650, 1750, 1850, 1950}\n", |
| 217 | + " and calculating 39 additional intervals from coalescent distribution.\n", |
| 218 | + "Using built-in frequency information from UKBB ...\n", |
| 219 | + "Will use mutation rate mu = 1.65e-08.\n", |
| 220 | + "Number of samples in CSFS calculations: 50.\n", |
| 221 | + "Verified 70 CSFS entries.\n", |
| 222 | + "\n", |
| 223 | + "Building decoding quantities...\n", |
| 224 | + "Genetic distances progress: 100%\t\n", |
| 225 | + "Physical distances progress: 100%\t" |
| 226 | + ] |
| 227 | + } |
| 228 | + ], |
140 | 229 | "source": [ |
141 | 230 | "dq = prepare_decoding(\n", |
142 | 231 | " csfs_file=str(files_dir / 'output.csfs'),\n", |
|
157 | 246 | }, |
158 | 247 | { |
159 | 248 | "cell_type": "code", |
160 | | - "execution_count": null, |
| 249 | + "execution_count": 5, |
161 | 250 | "id": "6ecc7fd7", |
162 | 251 | "metadata": {}, |
163 | | - "outputs": [], |
| 252 | + "outputs": [ |
| 253 | + { |
| 254 | + "data": { |
| 255 | + "text/plain": [ |
| 256 | + "{'states': 70, 'samples': 50, 'mu': 1.65e-08}" |
| 257 | + ] |
| 258 | + }, |
| 259 | + "execution_count": 5, |
| 260 | + "metadata": {}, |
| 261 | + "output_type": "execute_result" |
| 262 | + } |
| 263 | + ], |
164 | 264 | "source": [ |
165 | 265 | "{\"states\": dq.states, \"samples\": dq.samples, \"mu\": dq.mu}" |
166 | 266 | ] |
|
175 | 275 | }, |
176 | 276 | { |
177 | 277 | "cell_type": "code", |
178 | | - "execution_count": null, |
| 278 | + "execution_count": 6, |
179 | 279 | "id": "fad47cb3", |
180 | 280 | "metadata": {}, |
181 | | - "outputs": [], |
| 281 | + "outputs": [ |
| 282 | + { |
| 283 | + "data": { |
| 284 | + "text/plain": [ |
| 285 | + "(numpy.ndarray, (2, 70))" |
| 286 | + ] |
| 287 | + }, |
| 288 | + "execution_count": 6, |
| 289 | + "metadata": {}, |
| 290 | + "output_type": "execute_result" |
| 291 | + } |
| 292 | + ], |
182 | 293 | "source": [ |
183 | 294 | "X = dq.compressedEmission\n", |
184 | 295 | "type(X), X.shape" |
|
194 | 305 | }, |
195 | 306 | { |
196 | 307 | "cell_type": "code", |
197 | | - "execution_count": null, |
| 308 | + "execution_count": 7, |
198 | 309 | "id": "aacfc6cd", |
199 | 310 | "metadata": {}, |
200 | | - "outputs": [], |
| 311 | + "outputs": [ |
| 312 | + { |
| 313 | + "data": { |
| 314 | + "text/plain": [ |
| 315 | + "70" |
| 316 | + ] |
| 317 | + }, |
| 318 | + "execution_count": 7, |
| 319 | + "metadata": {}, |
| 320 | + "output_type": "execute_result" |
| 321 | + } |
| 322 | + ], |
201 | 323 | "source": [ |
202 | 324 | "len([x for x in dq.CSFS])" |
203 | 325 | ] |
204 | 326 | }, |
205 | 327 | { |
206 | 328 | "cell_type": "code", |
207 | | - "execution_count": null, |
| 329 | + "execution_count": 8, |
208 | 330 | "id": "2a1689b4", |
209 | 331 | "metadata": {}, |
210 | | - "outputs": [], |
| 332 | + "outputs": [ |
| 333 | + { |
| 334 | + "data": { |
| 335 | + "text/plain": [ |
| 336 | + "(3, 49)" |
| 337 | + ] |
| 338 | + }, |
| 339 | + "execution_count": 8, |
| 340 | + "metadata": {}, |
| 341 | + "output_type": "execute_result" |
| 342 | + } |
| 343 | + ], |
211 | 344 | "source": [ |
212 | 345 | "c0 = dq.CSFS[0]\n", |
213 | 346 | "{\"mu\": c0.mu, \"from\": c0.csfsFrom, \"to\": c0.csfsTo, \"samples\": c0.samples}\n", |
|
231 | 364 | "name": "python", |
232 | 365 | "nbconvert_exporter": "python", |
233 | 366 | "pygments_lexer": "ipython3", |
234 | | - "version": "3.8.10" |
| 367 | + "version": "3.11.11" |
235 | 368 | } |
236 | 369 | }, |
237 | 370 | "nbformat": 4, |
|
0 commit comments