From c96e729858fac763439f7134fa9231139e4da154 Mon Sep 17 00:00:00 2001 From: Arni Gunnarsson Date: Thu, 2 Apr 2020 09:59:10 +0100 Subject: [PATCH] Add 'isGC' column to IBD output table --- SRC/Argon.java | 56 +++++++++++++++++++++++++++------------------ SRC/Block.java | 16 +++++++++++-- SRC/IBD.java | 16 ++++++++++--- SRC/Individual.java | 13 +++++++---- 4 files changed, 70 insertions(+), 31 deletions(-) diff --git a/SRC/Argon.java b/SRC/Argon.java index b223b0c..ad6b587 100644 --- a/SRC/Argon.java +++ b/SRC/Argon.java @@ -433,6 +433,7 @@ public void simulate() { // System.out.println(); // } int GCend = -1; + boolean inGC = false; while (ind != null) { if (doGeneConversion) { //TODO: should avoid this and try to have unique piece of code for both gene conversion and no-gene conversion // if (verbose) { @@ -442,6 +443,7 @@ public void simulate() { nextRec = 0; if (ind.segments.get(0).start > GCend) { // the previous tract was not gene conversion, or the gene conversion tract has already ended by the start of the current block + inGC = false; int distToNextRec = (int) Math.round(Tools.sampleExponential(nextRecOrGCRate, generator)); nextRec = ind.segments.get(0).start + Tools.roundToBlock(distToNextRec, minBlockSize); // if (verbose) { @@ -450,21 +452,45 @@ public void simulate() { } else { // the previous tract was gene conversion, and the gene conversion tract has not ended by the start of the current block nextRec = GCend; + inGC = true; // if (verbose) { // System.out.println("In GC tract. Will terminate at previously sampled position:\t" + nextRec); // } } int parentTill = nextRec; + if (ind.segments.get(ind.segments.size() - 1).end >= parentTill && parentTill >= ind.segments.get(0).start) { // if (verbose) { // System.out.println("Will recombine at " + parentTill); // } // recombination will happen, handle current individual and update current with remaining part, then iterate - ArrayList otherGuySegments = ind.splitAfterSomeBlocks(parentTill, ind.ID, gen); + ArrayList otherGuySegments = ind.splitAfterSomeBlocks(parentTill, ind.ID, gen, inGC); nextIndividualChunk = new Individual(ind.ID, otherGuySegments); nextIndividualChunk.ID = ind.ID; } else { nextIndividualChunk = null; + } + + // if GCend == nextRec, I have just terminated a GC tract, will sample next GC/rec event. Else, may sample a GC + if (GCend != nextRec && parentTill < genomeSizeGenetic && generator.nextDouble() < probGeneConversionGivenRec) { //generator.nextDouble() < probGeneConversionGivenRec) { + // Enter gene conversion tract, will be resolved in next iteration + GCend = Tools.roundToBlock(sampleGeneConversionTract(parentTill, debug), minBlockSize);; +// if (verbose) { +// System.out.println("Gene conversion tract from " + parentTill + " to " + GCend); +// } + } else { + // With GCend at -1, we ensure a recombination event is sampled in next iteration + GCend = -1; +// System.out.println("Recombination from " + parentTill); +// if (verbose) { +// if (GCend != nextRec) { +// if (verbose) { +// System.out.println("No gene conversion tract, standard recombination will happen (because of sampling or genome size)."); +// } +// } else { +// System.out.println("No gene conversion tract, standard recombination will happen (because a GC just happened)."); +// } +// } } //handle current individual here @@ -522,25 +548,7 @@ public void simulate() { ind = nextIndividualChunk; parentQueue.add(parent); sampledPopQueue.add(sampledPopID); - // if GCend == nextRec, I have just terminated a GC tract, will sample next GC/rec event. Else, may sample a GC - if (GCend != nextRec && parentTill < genomeSizeGenetic && generator.nextDouble() < probGeneConversionGivenRec) { - // enter gene conversion tract - GCend = Tools.roundToBlock(sampleGeneConversionTract(parentTill, debug), minBlockSize);; -// if (verbose) { -// System.out.println("Gene conversion tract from " + parentTill + " to " + GCend); -// } - } else { - GCend = -1; -// if (verbose) { -// if (GCend != nextRec) { -// if (verbose) { -// System.out.println("No gene conversion tract, standard recombination will happen (because of sampling or genome size)."); -// } -// } else { -// System.out.println("No gene conversion tract, standard recombination will happen (because a GC just happened)."); -// } -// } - } + } else { if (debug) { System.out.println("Pop: " + currPop + " Ind: " + ind.ID + "; gen " + gen); @@ -559,7 +567,8 @@ public void simulate() { System.out.println("Will recombine at " + parentTill); } // recombination will happen, handle current individual and update current with remaining part, then iterate - ArrayList otherGuySegments = ind.splitAfterSomeBlocks(parentTill, ind.ID, gen); + // if gene conversion is off, inGC will always be set to false + ArrayList otherGuySegments = ind.splitAfterSomeBlocks(parentTill, ind.ID, gen, inGC); nextIndividualChunk = new Individual(ind.ID, otherGuySegments); nextIndividualChunk.ID = ind.ID; } else { @@ -791,6 +800,7 @@ public void visit() throws Exception { int IBD_to = 0; int IBD_fromGen = 0; int IBD_toGen = 0; + boolean IBD_isGC = false; ArrayList IBD_sets = null; if (printMUT || printIBD || writeAFS || printAFS) { //TODO: efficient visit of ARG @@ -928,6 +938,7 @@ public void visit() throws Exception { IBD_to = toPhys; IBD_fromGen = doneBlock.start; IBD_toGen = doneBlock.end; + IBD_isGC = doneBlock.isGC; } for (Block c : doneBlock.children) { DescendantsList dL = currentBlockMap.get(c); @@ -952,7 +963,8 @@ public void visit() throws Exception { int IBD_gen = doneBlock.gen; long IBD_ID = doneBlock.ID; try { - StringBuilder IBDstring = IBDFactory.addIBDset(IBD_from, IBD_to, IBD_fromGen, IBD_toGen, IBD_gen, IBD_ID, IBD_sets); + + StringBuilder IBDstring = IBDFactory.addIBDset(IBD_from, IBD_to, IBD_fromGen, IBD_toGen, IBD_gen, IBD_ID, IBD_sets, IBD_isGC); if (IBDstring.length() != 0) { if (writeOutFiles) { IBDwriter.write(IBDstring.toString()); diff --git a/SRC/Block.java b/SRC/Block.java index 738a453..824e42e 100644 --- a/SRC/Block.java +++ b/SRC/Block.java @@ -12,6 +12,7 @@ public class Block implements Comparable { public int start; public int end; + public boolean isGC = false; int numberOfDescendants; public boolean isCoalescent = false; public Block representedChildren = this; @@ -29,7 +30,6 @@ public class Block implements Comparable { // protected void finalize() { //remove // killed++; //remove // } //remove - Block(long ID, int gen, int start, int end, int descendants) { // count++; //remove // this.ID = count; //remove @@ -39,6 +39,17 @@ public class Block implements Comparable { this.end = end; this.numberOfDescendants = descendants; } + + Block(long ID, int gen, int start, int end, int descendants, boolean isGC) { +// count++; //remove +// this.ID = count; //remove + this.ID = ID; + this.gen = gen; + this.start = start; + this.end = end; + this.numberOfDescendants = descendants; + this.isGC = isGC; + } public boolean addParent(Block parent) { parents.add(parent); @@ -56,7 +67,8 @@ public void removeParent(Block parent) { public Block overlap(Block b) { // System.out.println(this.start + "-" + this.end + " " + b.start + "-" + b.end + " " + !((this.start > b.end + 1) || (b.start > this.end + 1))); if (!((this.start > b.end) || (b.start > this.end))) { - return new Block(0, 0, Math.max(this.start, b.start), Math.min(b.end, this.end), 0); + // If either one of overlapping blocks is a GC tract, the overlap is also labelled a GC block + return new Block(0, 0, Math.max(this.start, b.start), Math.min(b.end, this.end), 0, b.isGC || this.isGC); // coalesceRange.start = Math.max(this.start, b.start); // coalesceRange.end = Math.min(b.end, this.end); } else { diff --git a/SRC/IBD.java b/SRC/IBD.java index 138ef4d..54048ab 100644 --- a/SRC/IBD.java +++ b/SRC/IBD.java @@ -15,6 +15,7 @@ public class IBD { String[][] ancestors; Integer[][] ancStart; TreeMap map = new TreeMap(); + TreeMap GC_map = new TreeMap(); public IBD(float threshold, int samples) { this.threshold = threshold; @@ -29,12 +30,16 @@ public IBD(float threshold, int samples) { } } - public StringBuilder addIBDset(int from, int to, int fromGen, int toGen, int gen, long ID, ArrayList sets) throws Exception { + public StringBuilder addIBDset(int from, int to, int fromGen, int toGen, int gen, long ID, ArrayList sets, boolean isGC) throws Exception { StringBuilder IBDstring = new StringBuilder(); + // Adds from and to to the map map.put(fromGen, from); map.put(toGen, to); + GC_map.put(fromGen, isGC ? 1 : 0); + // Get current set list DescendantsList currentSetList = sets.get(0); HashSet currentSet = currentSetList.getHashSetClone(); + // Iterate over descendants for (int i = 1; i < sets.size(); i++) { DescendantsList nextSetList = sets.get(i); HashSet nextSet = nextSetList.getHashSetClone(); @@ -48,17 +53,21 @@ public StringBuilder addIBDset(int from, int to, int fromGen, int toGen, int gen id1 = k - 1; id2 = j - 1; } + // Fetch ancestors and segment starts String anc = ancestors[id1][id2]; Integer start = ancStart[id1][id2]; if (anc.compareTo("") == 0) { + // If they don't exist, save them ancestors[id1][id2] = gen + "\t" + ID; ancStart[id1][id2] = fromGen; } else if (anc.compareTo(gen + "\t" + ID) != 0) { + // If they do but they're not the same ones, update int len = fromGen - start; ancestors[id1][id2] = gen + "\t" + ID; ancStart[id1][id2] = fromGen; if (len / 1000000.0f >= threshold) { - IBDstring.append(new StringBuilder((id1 + 1) + "\t" + (id2 + 1) + "\t" + map.get(start) + "\t" + map.get(fromGen - 1) + "\t" + start + "\t" + (fromGen - 1) + "\t" + len / 1000000.0 + "\t" + anc + "\n")); + // If the segment is long enough build string + IBDstring.append(new StringBuilder((id1 + 1) + "\t" + (id2 + 1) + "\t" + map.get(start) + "\t" + map.get(fromGen - 1) + "\t" + start + "\t" + (fromGen - 1) + "\t" + len / 1000000.0 + "\t" + anc + "\t" + GC_map.get(start) + "\n")); } } } @@ -74,9 +83,10 @@ public StringBuilder finalize(int toGen) { for (int j = i + 1; j < samples; j++) { String anc = ancestors[i][j]; Integer start = ancStart[i][j]; + int isGC = GC_map.containsKey(start) ? GC_map.get(start) : 0; int len = toGen - start + 1; if (len / 1000000.0f >= threshold) { - IBDstring.append(new StringBuilder((i + 1) + "\t" + (j + 1) + "\t" + map.get(start) + "\t" + map.get(toGen) + "\t" + start + "\t" + toGen + "\t" + len / 1000000.0 + "\t" + anc + "\n")); + IBDstring.append(new StringBuilder((i + 1) + "\t" + (j + 1) + "\t" + map.get(start) + "\t" + map.get(toGen) + "\t" + start + "\t" + toGen + "\t" + len / 1000000.0 + "\t" + anc + "\t" + isGC + "\n")); } } } diff --git a/SRC/Individual.java b/SRC/Individual.java index 296c84d..a291e68 100644 --- a/SRC/Individual.java +++ b/SRC/Individual.java @@ -60,10 +60,11 @@ static void print(ArrayList segments) { System.out.println(); } - public ArrayList splitAfterSomeBlocks(int targetBlock, long ID, int gen) { + public ArrayList splitAfterSomeBlocks(int targetBlock, long ID, int gen, boolean inGC) { // if (verboseRecomb) { // System.out.println("************ START RECOMB at gen " + gen + " targetPos: " + targetBlock); -// System.out.print("start thid ind "); +// System.out.println("GC is " + inGC); +// System.out.print("start this ind "); // print(segments); // } ArrayList newMe = new ArrayList(); @@ -74,7 +75,11 @@ public ArrayList splitAfterSomeBlocks(int targetBlock, long ID, int gen) break; // from here on it's next ind } else if (segments.get(pos).start < targetBlock && segments.get(pos).end >= targetBlock) { Block recombinant = segments.get(pos); - Block chunkInNewThisInd = new Block(ID, gen, recombinant.start, targetBlock - 1, recombinant.numberOfDescendants); + // When we find a block containing the target, we split it up by + // start - target-1 + // target - end + // If inGC is true, we are terminating a GC block, so the lefmost fragment is labeled GC + Block chunkInNewThisInd = new Block(ID, gen, recombinant.start, targetBlock - 1, recombinant.numberOfDescendants, inGC); Block chunkInOtherInd = new Block(ID, gen, targetBlock, recombinant.end, recombinant.numberOfDescendants); newMe.add(chunkInNewThisInd); otherInd.add(chunkInOtherInd); @@ -175,7 +180,7 @@ public ArrayList mergeArrayLists(ArrayList otherBlocks, int gen, l // } destination.remove(destination.size() - 1); if (testedBlockInDestination.start < coalesceRange.start) { - Block headBlock = new Block(ID, gen, testedBlockInDestination.start, coalesceRange.start - 1, testedBlockInDestination.numberOfDescendants); + Block headBlock = new Block(ID, gen, testedBlockInDestination.start, coalesceRange.start - 1, testedBlockInDestination.numberOfDescendants);//, testedBlockInDestination.isGC); headBlock.representedChildren = testedBlockInDestination.representedChildren; created.add(headBlock); // add this block to the list of created blocks // if (verboseCoalesce) {