From 928c88a124de1d98390401c294912c9c257e2333 Mon Sep 17 00:00:00 2001 From: Anna Parker <50943381+anna-parker@users.noreply.github.com> Date: Thu, 28 Apr 2022 15:32:32 +0200 Subject: [PATCH 1/5] Update .gitignore --- .gitignore | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index a00a6c0..df93901 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,26 @@ -*profile* \ No newline at end of file +# Files generated by invoking Julia with --code-coverage +*.jl.cov +*.jl.*.cov + +# Files generated by invoking Julia with --track-allocation +*.jl.mem + +# System-specific files and directories generated by the BinaryProvider and BinDeps packages +# They contain absolute paths specific to the host computer, and so should not be committed +deps/deps.jl +deps/build.log +deps/downloads/ +deps/usr/ +deps/src/ + +# Build artifacts for creating documentation generated by the Documenter package +docs/build/ +docs/site/ + +# File generated by Pkg, the package manager, based on a corresponding Project.toml +# It records a fixed state of all packages used by the project. As such, it should not be +# committed for packages, but should be committed for applications that require a static +# environment. +Manifest.toml + +*profile* From f40721062795a8a8113cd13568eec85fa5867ac7 Mon Sep 17 00:00:00 2001 From: anna-parker Date: Thu, 28 Apr 2022 16:12:25 +0200 Subject: [PATCH 2/5] add functionalities for evolving sequences --- Project.toml | 1 + src/Evolve/Evolve.jl | 2 ++ src/Evolve/main.jl | 41 +++++++++++++++++++++++++++++++++++++++-- 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index 64d983a..402d853 100644 --- a/Project.toml +++ b/Project.toml @@ -7,6 +7,7 @@ version = "0.1.0" BioSequences = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59" BioSymbols = "3c28c6f8-a34d-59c4-9654-267d177fcfa9" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" +FASTX = "c2308a5c-f048-11e8-3e8a-31650f418d12" LightGraphs = "093fc24a-ae57-5d10-9952-331d41423f4d" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" SubstitutionModels = "8365b1bb-bd83-58ee-a267-f2965fc81c73" diff --git a/src/Evolve/Evolve.jl b/src/Evolve/Evolve.jl index 868e1ca..99ac0c9 100644 --- a/src/Evolve/Evolve.jl +++ b/src/Evolve/Evolve.jl @@ -4,9 +4,11 @@ using BioSequences using StatsBase using SubstitutionModels using TreeTools +using FASTX export evolve! export compute_mutations! +export write_seq2fasta include("main.jl") include("mutations.jl") diff --git a/src/Evolve/main.jl b/src/Evolve/main.jl index 0a5a745..c3c3c77 100644 --- a/src/Evolve/main.jl +++ b/src/Evolve/main.jl @@ -1,26 +1,37 @@ """ evolve!(t::Tree, L::Int, μ=1.; model = JC69(1.), seqkey=:seq) + + initialize a random sequence of nucleotides of length `L` at the root, then simulate evolution + by changing nucleotides at random according to a probability distribution specified + by the chosen `model`, which takes the mutation rate μ and branch lengths of nodes as parameters. + + The sequence at a node `n` can be accessed by `n.data.dat[seqkey]`, if a mutation has occured + on the branch between node `n` and its ancestor is given as a boolean in `n.data.dat["evolved"]`. """ function evolve!(t::Tree, L::Int, μ=1.; model = JC69(1.), seqkey=:seq) t.root.data.dat[seqkey] = randseq(DNAAlphabet{4}(), L) + t.root.data.dat["evolved"]= false for c in t.root.child evolve!(c, model, μ, seqkey) end end function evolve!(n::TreeNode, model, μ, seqkey=:seq) + n.data.dat["evolved"]= false w = ProbabilityWeights(SubstitutionModels.P(model, μ * n.tau)[:,1], 1.) n.data.dat[seqkey] = deepcopy(n.anc.data.dat[seqkey]) - evolve!(n.data.dat[seqkey], w) + evolve_seq!(n, w, seqkey) for c in n.child evolve!(c, model, μ, seqkey) end end -function evolve!(seq, w) +function evolve_seq!(n::TreeNode, w, seqkey=:seq) + seq = n.data.dat[seqkey] for (i,nt) in enumerate(seq) newnt = sample(1:4, w) if newnt != 1 + n.data.dat["evolved"] = true if nt == DNA_A if newnt == 2; seq[i] = DNA_C elseif newnt == 3; seq[i] = DNA_G @@ -41,3 +52,29 @@ function evolve!(seq, w) end end end +""" + write_seq2fasta(t::Tree, fasta_name::String, output_dir::String, seqkey=:seq; only_terminals=false, remove_0_mutations=true) + + write evolved sequences to a fasta file with name `fasta_name`, `only_terminals` specifies if only terminal sequences should be + written to the file and `remove_0_mutations` if only sequences with mutations on their branches should be written to the file. +""" +function write_seq2fasta(t::Tree, fasta_name::String, output_dir::String, seqkey=:seq; only_terminals=false, remove_0_mutations=true) + mkpath(output_dir) + open(FASTA.Writer, output_dir * "sequences" *fasta_name* ".fasta") do w + if only_terminals + iter = POTleaves(t) + else + iter = POT(t) + end + for node in iter + if remove_0_mutations && !node.data.dat["evolved"] + continue + else + x = node.data.dat[seqkey] + rec = FASTA.Record(node.label, x) + write(w, rec) + end + end + end + +end From bd1825aaee568de43220d0d12e8162abfe46048c Mon Sep 17 00:00:00 2001 From: Anna Parker <50943381+anna-parker@users.noreply.github.com> Date: Thu, 28 Apr 2022 16:13:28 +0200 Subject: [PATCH 3/5] Delete Manifest.toml --- Manifest.toml | 449 -------------------------------------------------- 1 file changed, 449 deletions(-) delete mode 100644 Manifest.toml diff --git a/Manifest.toml b/Manifest.toml deleted file mode 100644 index d521f53..0000000 --- a/Manifest.toml +++ /dev/null @@ -1,449 +0,0 @@ -# This file is machine-generated - editing it directly is not advised - -[[ArgTools]] -uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" - -[[ArnoldiMethod]] -deps = ["LinearAlgebra", "Random", "StaticArrays"] -git-tree-sha1 = "f87e559f87a45bece9c9ed97458d3afe98b1ebb9" -uuid = "ec485272-7323-5ecc-a04f-4719b315124d" -version = "0.1.0" - -[[Artifacts]] -uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" - -[[Automa]] -deps = ["Printf", "ScanByte", "TranscodingStreams"] -git-tree-sha1 = "d50976f217489ce799e366d9561d56a98a30d7fe" -uuid = "67c07d97-cdcb-5c2c-af73-a7f9c32a568b" -version = "0.8.2" - -[[Base64]] -uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" - -[[BioGenerics]] -deps = ["TranscodingStreams"] -git-tree-sha1 = "57deb413ca9f4c8bc7d4c6e98ebe217ff728c737" -uuid = "47718e42-2ac5-11e9-14af-e5595289c2ea" -version = "0.1.0" - -[[BioSequences]] -deps = ["BioGenerics", "BioSymbols", "Combinatorics", "IndexableBitVectors", "Printf", "Random", "StableRNGs", "Twiddle"] -git-tree-sha1 = "093ccb9211bdc71924abf8e74a0790af11da35a7" -uuid = "7e6ae17a-c86d-528c-b3b9-7f778a29fe59" -version = "2.0.5" - -[[BioSymbols]] -deps = ["Automa"] -git-tree-sha1 = "ec77888ac3e78f9d372c2b533bdb52668f9e2b09" -uuid = "3c28c6f8-a34d-59c4-9654-267d177fcfa9" -version = "4.0.4" - -[[ChainRulesCore]] -deps = ["Compat", "LinearAlgebra", "SparseArrays"] -git-tree-sha1 = "bd0cc939d94b8bd736dce5bbbe0d635db9f94af7" -uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" -version = "0.9.41" - -[[CodeTracking]] -deps = ["InteractiveUtils", "UUIDs"] -git-tree-sha1 = "8ad457cfeb0bca98732c97958ef81000a543e73e" -uuid = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2" -version = "1.0.5" - -[[Combinatorics]] -git-tree-sha1 = "08c8b6831dc00bfea825826be0bc8336fc369860" -uuid = "861a8166-3701-5b0c-9a16-15d98fcdc6aa" -version = "1.0.2" - -[[Compat]] -deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"] -git-tree-sha1 = "0a817fbe51c976de090aa8c997b7b719b786118d" -uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" -version = "3.28.0" - -[[CompilerSupportLibraries_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" - -[[Crayons]] -git-tree-sha1 = "3f71217b538d7aaee0b69ab47d9b7724ca8afa0d" -uuid = "a8cc5b0e-0ffa-5ad4-8c14-923d3ee1735f" -version = "4.0.4" - -[[DataAPI]] -git-tree-sha1 = "dfb3b7e89e395be1e25c2ad6d7690dc29cc53b1d" -uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" -version = "1.6.0" - -[[DataStructures]] -deps = ["Compat", "InteractiveUtils", "OrderedCollections"] -git-tree-sha1 = "4437b64df1e0adccc3e5d1adbc3ac741095e4677" -uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" -version = "0.18.9" - -[[Dates]] -deps = ["Printf"] -uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" - -[[Debugger]] -deps = ["CodeTracking", "Crayons", "Highlights", "InteractiveUtils", "JuliaInterpreter", "Markdown", "REPL"] -git-tree-sha1 = "7977a34a188da33b08ff8c0fd103f6f9f7da874d" -uuid = "31a5f54b-26ea-5ae9-a837-f05ce5417438" -version = "0.6.7" - -[[DelimitedFiles]] -deps = ["Mmap"] -uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab" - -[[Distributed]] -deps = ["Random", "Serialization", "Sockets"] -uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" - -[[Distributions]] -deps = ["FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SparseArrays", "SpecialFunctions", "Statistics", "StatsBase", "StatsFuns"] -git-tree-sha1 = "36aa8125c8caa6a449b54d5d45ac97afd392549d" -uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" -version = "0.25.0" - -[[DocStringExtensions]] -deps = ["LibGit2", "Markdown", "Pkg", "Test"] -git-tree-sha1 = "9d4f64f79012636741cf01133158a54b24924c32" -uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" -version = "0.8.4" - -[[Downloads]] -deps = ["ArgTools", "LibCURL", "NetworkOptions"] -uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" - -[[FASTX]] -deps = ["Automa", "BioGenerics", "BioSequences", "BioSymbols", "TranscodingStreams"] -git-tree-sha1 = "a980d6ac14c84c3ed17d0d07a0963a1c6d074b34" -uuid = "c2308a5c-f048-11e8-3e8a-31650f418d12" -version = "1.1.3" - -[[FastaIO]] -deps = ["GZip"] -git-tree-sha1 = "1656782ed1b7c3643c5196aadf8e5107569daaa0" -uuid = "a0c94c4b-ebed-5953-b5fc-82fe598ac79f" -version = "1.0.0" - -[[FillArrays]] -deps = ["LinearAlgebra", "Random", "SparseArrays"] -git-tree-sha1 = "31939159aeb8ffad1d4d8ee44d07f8558273120a" -uuid = "1a297f60-69ca-5386-bcde-b61e274b549b" -version = "0.11.7" - -[[GZip]] -deps = ["Libdl"] -git-tree-sha1 = "039be665faf0b8ae36e089cd694233f5dee3f7d6" -uuid = "92fee26a-97fe-5a0c-ad85-20a5f3185b63" -version = "0.5.1" - -[[Highlights]] -deps = ["DocStringExtensions", "InteractiveUtils", "REPL"] -git-tree-sha1 = "f823a2d04fb233d52812c8024a6d46d9581904a4" -uuid = "eafb193a-b7ab-5a9e-9068-77385905fa72" -version = "0.4.5" - -[[IndexableBitVectors]] -deps = ["Random", "Test"] -git-tree-sha1 = "b7f5e42dc867b8a8654a5f899064632dac05bc82" -uuid = "1cb3b9ac-1ffd-5777-9e6b-a3d42300664d" -version = "1.0.0" - -[[Inflate]] -git-tree-sha1 = "f5fc07d4e706b84f72d54eedcc1c13d92fb0871c" -uuid = "d25df0c9-e2be-5dd7-82c8-3ad0b3e990b9" -version = "0.1.2" - -[[InteractiveUtils]] -deps = ["Markdown"] -uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" - -[[JLLWrappers]] -deps = ["Preferences"] -git-tree-sha1 = "642a199af8b68253517b80bd3bfd17eb4e84df6e" -uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" -version = "1.3.0" - -[[JSON]] -deps = ["Dates", "Mmap", "Parsers", "Unicode"] -git-tree-sha1 = "81690084b6198a2e1da36fcfda16eeca9f9f24e4" -uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" -version = "0.21.1" - -[[JuliaInterpreter]] -deps = ["CodeTracking", "InteractiveUtils", "Random", "UUIDs"] -git-tree-sha1 = "d77a7167d45a8e339e96916e0e9d181c5f68277d" -uuid = "aa1ae85d-cabe-5617-a682-6adf51b2e16a" -version = "0.8.14" - -[[LibCURL]] -deps = ["LibCURL_jll", "MozillaCACerts_jll"] -uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" - -[[LibCURL_jll]] -deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] -uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" - -[[LibGit2]] -deps = ["Base64", "NetworkOptions", "Printf", "SHA"] -uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" - -[[LibSSH2_jll]] -deps = ["Artifacts", "Libdl", "MbedTLS_jll"] -uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" - -[[Libdl]] -uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" - -[[LightGraphs]] -deps = ["ArnoldiMethod", "DataStructures", "Distributed", "Inflate", "LinearAlgebra", "Random", "SharedArrays", "SimpleTraits", "SparseArrays", "Statistics"] -git-tree-sha1 = "432428df5f360964040ed60418dd5601ecd240b6" -uuid = "093fc24a-ae57-5d10-9952-331d41423f4d" -version = "1.3.5" - -[[LinearAlgebra]] -deps = ["Libdl"] -uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" - -[[LogExpFunctions]] -deps = ["DocStringExtensions", "LinearAlgebra"] -git-tree-sha1 = "ed26854d7c2c867d143f0e07c198fc9e8b721d10" -uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688" -version = "0.2.3" - -[[Logging]] -uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" - -[[MacroTools]] -deps = ["Markdown", "Random"] -git-tree-sha1 = "6a8a2a625ab0dea913aba95c11370589e0239ff0" -uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" -version = "0.5.6" - -[[Markdown]] -deps = ["Base64"] -uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" - -[[MbedTLS_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" - -[[Missings]] -deps = ["DataAPI"] -git-tree-sha1 = "4ea90bd5d3985ae1f9a908bd4500ae88921c5ce7" -uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" -version = "1.0.0" - -[[Mmap]] -uuid = "a63ad114-7e13-5084-954f-fe012c677804" - -[[MozillaCACerts_jll]] -uuid = "14a3606d-f60d-562e-9121-12d972cd8159" - -[[NetworkOptions]] -uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" - -[[OpenSpecFun_jll]] -deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "b9b8b8ed236998f91143938a760c2112dceeb2b4" -uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e" -version = "0.5.4+0" - -[[OrderedCollections]] -git-tree-sha1 = "4fa2ba51070ec13fcc7517db714445b4ab986bdf" -uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" -version = "1.4.0" - -[[PDMats]] -deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse"] -git-tree-sha1 = "f82a0e71f222199de8e9eb9a09977bd0767d52a0" -uuid = "90014a1f-27ba-587c-ab20-58faa44d9150" -version = "0.11.0" - -[[Parsers]] -deps = ["Dates"] -git-tree-sha1 = "c8abc88faa3f7a3950832ac5d6e690881590d6dc" -uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" -version = "1.1.0" - -[[Pkg]] -deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] -uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" - -[[Preferences]] -deps = ["TOML"] -git-tree-sha1 = "ea79e4c9077208cd3bc5d29631a26bc0cff78902" -uuid = "21216c6a-2e73-6563-6e65-726566657250" -version = "1.2.1" - -[[Printf]] -deps = ["Unicode"] -uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" - -[[QuadGK]] -deps = ["DataStructures", "LinearAlgebra"] -git-tree-sha1 = "12fbe86da16df6679be7521dfb39fbc861e1dc7b" -uuid = "1fd47b50-473d-5c70-9696-f719f8f3bcdc" -version = "2.4.1" - -[[REPL]] -deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] -uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" - -[[Random]] -deps = ["Serialization"] -uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" - -[[Rmath]] -deps = ["Random", "Rmath_jll"] -git-tree-sha1 = "bf3188feca147ce108c76ad82c2792c57abe7b1f" -uuid = "79098fc4-a85e-5d69-aa6a-4863f24498fa" -version = "0.7.0" - -[[Rmath_jll]] -deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] -git-tree-sha1 = "68db32dff12bb6127bac73c209881191bf0efbb7" -uuid = "f50d1b31-88e8-58de-be2c-1cc44531875f" -version = "0.3.0+0" - -[[SHA]] -uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" - -[[SIMD]] -git-tree-sha1 = "2318299b4c8e8fe06f6f9114fb4404bd1461ae48" -uuid = "fdea26ae-647d-5447-a871-4b548cad5224" -version = "3.3.0" - -[[ScanByte]] -deps = ["Libdl", "SIMD"] -git-tree-sha1 = "9cc2955f2a254b18be655a4ee70bc4031b2b189e" -uuid = "7b38b023-a4d7-4c5e-8d43-3f3097f304eb" -version = "0.3.0" - -[[Serialization]] -uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" - -[[SharedArrays]] -deps = ["Distributed", "Mmap", "Random", "Serialization"] -uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383" - -[[SimpleTraits]] -deps = ["InteractiveUtils", "MacroTools"] -git-tree-sha1 = "daf7aec3fe3acb2131388f93a4c409b8c7f62226" -uuid = "699a6c99-e7fa-54fc-8d76-47d257e15c1d" -version = "0.9.3" - -[[Sockets]] -uuid = "6462fe0b-24de-5631-8697-dd941f90decc" - -[[SortingAlgorithms]] -deps = ["DataStructures"] -git-tree-sha1 = "2ec1962eba973f383239da22e75218565c390a96" -uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c" -version = "1.0.0" - -[[SparseArrays]] -deps = ["LinearAlgebra", "Random"] -uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" - -[[SpecialFunctions]] -deps = ["ChainRulesCore", "OpenSpecFun_jll"] -git-tree-sha1 = "5919936c0e92cff40e57d0ddf0ceb667d42e5902" -uuid = "276daf66-3868-5448-9aa4-cd146d93841b" -version = "1.3.0" - -[[StableRNGs]] -deps = ["Random", "Test"] -git-tree-sha1 = "b57c4216b6c163a3a9d674f6b9f7b99cdccdb959" -uuid = "860ef19b-820b-49d6-a774-d7a799459cd3" -version = "0.1.2" - -[[StaticArrays]] -deps = ["LinearAlgebra", "Random", "Statistics"] -git-tree-sha1 = "da4cf579416c81994afd6322365d00916c79b8ae" -uuid = "90137ffa-7385-5640-81b9-e52037218182" -version = "0.12.5" - -[[Statistics]] -deps = ["LinearAlgebra", "SparseArrays"] -uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" - -[[StatsAPI]] -git-tree-sha1 = "1958272568dc176a1d881acb797beb909c785510" -uuid = "82ae8749-77ed-4fe6-ae5f-f523153014b0" -version = "1.0.0" - -[[StatsBase]] -deps = ["DataAPI", "DataStructures", "LinearAlgebra", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics", "StatsAPI"] -git-tree-sha1 = "2f6792d523d7448bbe2fec99eca9218f06cc746d" -uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" -version = "0.33.8" - -[[StatsFuns]] -deps = ["LogExpFunctions", "Rmath", "SpecialFunctions"] -git-tree-sha1 = "30cd8c360c54081f806b1ee14d2eecbef3c04c49" -uuid = "4c63d2b9-4356-54db-8cca-17b64c39e42c" -version = "0.9.8" - -[[SubstitutionModels]] -deps = ["BioSymbols", "LinearAlgebra", "StaticArrays"] -git-tree-sha1 = "c96d8aa5d536d553ca5575bfadfd6950c7164ab2" -uuid = "8365b1bb-bd83-58ee-a267-f2965fc81c73" -version = "0.4.2" - -[[SuiteSparse]] -deps = ["Libdl", "LinearAlgebra", "Serialization", "SparseArrays"] -uuid = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9" - -[[TOML]] -deps = ["Dates"] -uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" - -[[Tar]] -deps = ["ArgTools", "SHA"] -uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" - -[[Test]] -deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] -uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" - -[[TranscodingStreams]] -deps = ["Random", "Test"] -git-tree-sha1 = "7c53c35547de1c5b9d46a4797cf6d8253807108c" -uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa" -version = "0.9.5" - -[[TreeTools]] -deps = ["BioSequences", "Dates", "Debugger", "Distributions", "FASTX", "FastaIO", "JSON"] -git-tree-sha1 = "f113a2cd9630f9078c2b76da4b66bb564c716e07" -repo-rev = "master" -repo-url = "../TreeTools" -uuid = "62f0eae3-8c0e-4032-a621-7756092209e5" -version = "0.1.0" - -[[Twiddle]] -git-tree-sha1 = "29509c4862bfb5da9e76eb6937125ab93986270a" -uuid = "7200193e-83a8-5a55-b20d-5d36d44a0795" -version = "1.1.2" - -[[UUIDs]] -deps = ["Random", "SHA"] -uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" - -[[Unicode]] -uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" - -[[Zlib_jll]] -deps = ["Libdl"] -uuid = "83775a58-1f1d-513f-b197-d71354ab007a" - -[[nghttp2_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" - -[[p7zip_jll]] -deps = ["Artifacts", "Libdl"] -uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" From 9f6ec219b6d44f74fa57083e54ab190cf4c2d964 Mon Sep 17 00:00:00 2001 From: anna-parker Date: Wed, 28 Sep 2022 15:36:52 +0200 Subject: [PATCH 4/5] change default file naming convention --- src/Evolve/main.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Evolve/main.jl b/src/Evolve/main.jl index efa008c..456ef06 100644 --- a/src/Evolve/main.jl +++ b/src/Evolve/main.jl @@ -86,6 +86,7 @@ function onehot(c::DNA) end return s end + """ write_seq2fasta(t::Tree, fasta_name::String, output_dir::String, seqkey=:seq; only_terminals=false, remove_0_mutations=true) @@ -94,7 +95,7 @@ end """ function write_seq2fasta(t::Tree, fasta_name::String, output_dir::String, seqkey=:seq; only_terminals=false, remove_0_mutations=true) mkpath(output_dir) - open(FASTA.Writer, output_dir * "sequences" *fasta_name* ".fasta") do w + open(FASTA.Writer, output_dir *"/"*fasta_name* ".fasta") do w if only_terminals iter = POTleaves(t) else From 53dbbb14879e20017d7a676af912f2470dfa9706 Mon Sep 17 00:00:00 2001 From: anna-parker Date: Tue, 15 Nov 2022 11:09:44 +0100 Subject: [PATCH 5/5] add date to fasta files --- src/Evolve/Evolve.jl | 1 + src/Evolve/main.jl | 12 ++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/Evolve/Evolve.jl b/src/Evolve/Evolve.jl index 6f62a96..2afdbd3 100644 --- a/src/Evolve/Evolve.jl +++ b/src/Evolve/Evolve.jl @@ -5,6 +5,7 @@ using StatsBase using SubstitutionModels using TreeTools using FASTX +using Dates export evolve! export compute_mutations! diff --git a/src/Evolve/main.jl b/src/Evolve/main.jl index 456ef06..57bd224 100644 --- a/src/Evolve/main.jl +++ b/src/Evolve/main.jl @@ -93,7 +93,8 @@ end write evolved sequences to a fasta file with name `fasta_name`, `only_terminals` specifies if only terminal sequences should be written to the file and `remove_0_mutations` if only sequences with mutations on their branches should be written to the file. """ -function write_seq2fasta(t::Tree, fasta_name::String, output_dir::String, seqkey=:seq; only_terminals=false, remove_0_mutations=true) +function write_seq2fasta(t::Tree, fasta_name::String, output_dir::String, seqkey=:seq; + only_terminals=false, remove_0_mutations=true, write_date=false, year_rate=4.73e-3) mkpath(output_dir) open(FASTA.Writer, output_dir *"/"*fasta_name* ".fasta") do w if only_terminals @@ -106,7 +107,14 @@ function write_seq2fasta(t::Tree, fasta_name::String, output_dir::String, seqkey continue else x = node.data.dat[seqkey] - rec = FASTA.Record(node.label, x) + if write_date + start_date = Date(2000,1,1) + dist = TreeTools.distance(node, t.root)/year_rate + date = start_date+Dates.Year(round(dist)) + Dates.Day(round((dist-round(dist))/365)) + rec = FASTA.Record(node.label*"|"*string(date), x) + else + rec = FASTA.Record(node.label, x) + end write(w, rec) end end