From 455f04b1783b9773b19fa7bc0d8386c64221037c Mon Sep 17 00:00:00 2001 From: Tyler Peryea Date: Tue, 12 Feb 2019 18:29:10 -0500 Subject: [PATCH 01/25] fix with some tests for issue #4, #3, #7, needs evaluation --- src/main/java/lychi/LyChIStandardizer.java | 124 +++++++++- src/test/java/lychi/LychiRegressionTest.java | 224 ++++++++++++++++++- 2 files changed, 341 insertions(+), 7 deletions(-) diff --git a/src/main/java/lychi/LyChIStandardizer.java b/src/main/java/lychi/LyChIStandardizer.java index ebfd5f6..1cfd5be 100644 --- a/src/main/java/lychi/LyChIStandardizer.java +++ b/src/main/java/lychi/LyChIStandardizer.java @@ -1166,7 +1166,129 @@ else if (chiral != 0) { int i = m.indexOf(me.getKey()); m.setChirality(i, me.getValue()); } - + try{ + + Map nonChiralStereo = new LinkedHashMap<>(); + + for(int k=0;k rings = new HashSet(); + + int[][] sssr=m.getSSSR(); + for(MolAtom ma:nonChiralStereo.keySet()){ + //need to find all atoms in the ring + int im=m.indexOf(ma); + for(int[] ir:sssr){ + for(int i=0;i ratoms=Arrays.stream(rr) + .mapToObj(i->m.getAtom(i)) + .collect(Collectors.toSet()); + + MolBond[] bonds=ratoms.stream() + .filter(a->!chirality.containsKey(a)) + .flatMap(a->IntStream.range(0, a.getEdgeCount()).mapToObj(i->a.getEdge(i))) + .filter(e->!ratoms.contains(e.getNode1()) || !ratoms.contains(e.getNode2())) + .map(b->(MolBond)b) + .filter(b->b.getType()==1) + .peek(b->{ + if(ratoms.contains(b.getAtom1()))b.swap(); + }) + .toArray(i->new MolBond[i]); + + BitSet bs = new BitSet(bonds.length*2); + for(int i=0;i allPossible = new HashSet(); + Set currentPossible = new HashSet(); + + for(int i=0;i>j&1)==1){ + onOff.set(j*2); + bonds[j].setFlags(MolBond.UP, MolBond.STEREO1_MASK); + }else{ + onOff.set(j*2+1); + bonds[j].setFlags(MolBond.DOWN, MolBond.STEREO1_MASK); + } + } + Molecule mclone=m.cloneMolecule(); + //(new LyChIStandardizer()).standardize(mclone); + String hash1=LyChIStandardizer.hashKey(mclone); + allPossible.add(hash1); + onOff.or(bs); + + if(onOff.cardinality() == bs.cardinality()){ + currentPossible.add(hash1); + } + } + if(allPossible.size()==currentPossible.size()){ + for(int j=0;j me : chirality.entrySet()) { diff --git a/src/test/java/lychi/LychiRegressionTest.java b/src/test/java/lychi/LychiRegressionTest.java index 2882596..85b8b48 100644 --- a/src/test/java/lychi/LychiRegressionTest.java +++ b/src/test/java/lychi/LychiRegressionTest.java @@ -1,6 +1,7 @@ package lychi; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.*; import java.util.ArrayList; import java.util.Collections; @@ -26,6 +27,7 @@ public static class LychiTestInstance{ String name; String input; String expectedLychi; + boolean shouldMatch=true; public static LychiTestInstance of(String smi, String lychi){ @@ -36,6 +38,27 @@ public static LychiTestInstance of(String smi, String lychi){ return ltest; } + public static LychiTestInstance equivalent(String smi1, String smi2){ + try{ + MolHandler mh = new MolHandler(); + mh.setMolecule(smi2); + Molecule m= mh.getMolecule(); + LyChIStandardizer std = new LyChIStandardizer(); + std.standardize(m); + String fullKey=LyChIStandardizer.hashKey(m); + return of(smi1,fullKey); + }catch(Exception e){ + throw new RuntimeException(e); + } + } + public static LychiTestInstance notEquivalent(String smi1, String smi2){ + return equivalent(smi1,smi2).negate(); + } + public LychiTestInstance negate(){ + this.shouldMatch=!this.shouldMatch; + return this; + } + public LychiTestInstance name(String n){ this.name=n; return this; @@ -54,6 +77,8 @@ public Molecule getMolecule() throws MolFormatException{ } } + + private LychiTestInstance spec; @@ -61,11 +86,15 @@ public LychiRegressionTest(String ignored, LychiTestInstance spec){ this.spec = spec; } - public static void basicTest(Molecule m, String expected) throws Exception{ + public static void basicTest(Molecule m, String expected, boolean match) throws Exception{ LyChIStandardizer std = new LyChIStandardizer(); std.standardize(m); String fullKey=LyChIStandardizer.hashKey(m); - assertEquals(expected,fullKey); + if(match){ + assertEquals(expected,fullKey); + }else{ + assertNotEquals(expected,fullKey); + } } public static Molecule shuffleMolecule(Molecule m, int[] map){ @@ -97,7 +126,7 @@ public static Molecule shuffleMolecule(Molecule m, int[] map){ @Test public void correctLychiFirstTime() throws Exception{ - basicTest(spec.getMolecule(),spec.expectedLychi); + basicTest(spec.getMolecule(),spec.expectedLychi, spec.shouldMatch); } @Test @@ -119,11 +148,11 @@ public void correctLychiAfterRandomShuffle() throws Exception{ int[] map =iatoms.stream().mapToInt(i1->i1).toArray(); Molecule s=shuffleMolecule(m,map); - basicTest(s,spec.expectedLychi); + basicTest(s,spec.expectedLychi,spec.shouldMatch); } } - @Test + //@Test public void daisyChainLychiAfter10Times() throws Exception{ Molecule m=spec.getMolecule(); m.clean(2, null); @@ -133,7 +162,7 @@ public void daisyChainLychiAfter10Times() throws Exception{ Collections.shuffle(iatoms); int[] map =iatoms.stream().mapToInt(i1->i1).toArray(); Molecule s=shuffleMolecule(m,map); - basicTest(s,spec.expectedLychi); + basicTest(s,spec.expectedLychi,spec.shouldMatch); m=s; } } @@ -147,6 +176,189 @@ public static List data(){ tests.add(LychiTestInstance.of("O=C(O[C@H]1C[C@H]2C[C@H]3C[C@@H](C1)N2CC3=O)C4=CNC5=C4C=CC=C5","38C4U16JU-UC5KDUPMVH-UHFJLJL661C-UHCRHDK74DXU").name("cage-like structure")); tests.add(LychiTestInstance.of("C[C@@H]1CC[C@@H](C)CC1","T75RBW5S8-8D9T563A7Y-8YC8NQXD9W5-8Y5MFVTVS3J3").name("trans across ring")); tests.add(LychiTestInstance.of("C[C@H]1CC[C@@H](C)CC1","T75RBW5S8-8D9T563A7Y-8YC8NQXD9W5-8Y5JH5RWXRLR").name("cis across ring")); + tests.add(LychiTestInstance.of("[H][C@@]12[C@@H]3SC[C@]4(NCCC5=C4C=C(OC)C(O)=C5)C(=O)OC[C@H](N1[C@@H](O)[C@@H]6CC7=C([C@H]2N6C)C(O)=C(OC)C(C)=C7)C8=C9OCOC9=C(C)C(OC(C)=O)=C38", "DCLRH149F-FFMPLZ16VC-FC35942KGAU-FCUDSDS2V1NT").name("round trip problem")); + + + tests.add(LychiTestInstance.equivalent("\n" + + " Ketcher 12201304332D 1 1.00000 0.00000 0\n" + + "\n" + + " 59 67 0 1 0 999 V2000\n" + + " -2.2321 -1.8660 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -1.7321 -1.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -2.5981 -0.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -2.5981 0.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -3.4641 1.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -3.4641 2.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -4.3301 2.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -2.5981 2.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -2.5981 3.5000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -3.4641 4.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -1.7321 2.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.8660 2.5000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -1.7321 1.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.8660 0.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.4740 1.2647 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -1.7321 0.0000 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.9071 -0.4750 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.0000 1.0000 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.0000 -1.0000 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.8660 -1.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.8660 -2.5000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.8660 -1.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.8561 -2.3746 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 2.4488 -3.1947 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 4.5544 -3.0234 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 5.3132 -1.2000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 6.5741 -1.3179 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 4.8632 0.2250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 4.0294 0.9234 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 2.9488 1.1197 0.0000 S 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.8660 0.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.8811 1.3246 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 1.7321 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 2.5981 0.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 2.4244 1.4848 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 1.6097 2.0768 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.7419 1.9858 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 1.7927 2.9165 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 3.4641 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 3.9301 0.3000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 3.4641 -1.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 4.2072 -1.6691 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 3.8005 -2.5827 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 2.8060 -2.4781 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 2.5981 -1.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 1.7321 -1.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 5.6506 -0.2222 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 6.4172 0.1894 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 6.4966 1.1232 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 5.8342 1.6954 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 6.0136 2.6792 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 5.2512 3.3264 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 5.4306 4.3102 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 4.3096 2.9897 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 3.5473 3.6370 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 3.7266 4.6207 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 4.1303 2.0060 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 4.8676 1.3838 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 2 1 1 1 0 0\n" + + " 2 3 1 0 0 0\n" + + " 3 4 1 0 0 0\n" + + " 4 5 1 0 0 0\n" + + " 5 6 2 0 0 0\n" + + " 6 7 1 0 0 0\n" + + " 6 8 1 0 0 0\n" + + " 8 9 1 0 0 0\n" + + " 9 10 1 0 0 0\n" + + " 8 11 2 0 0 0\n" + + " 11 12 1 0 0 0\n" + + " 11 13 1 0 0 0\n" + + " 4 13 2 0 0 0\n" + + " 13 14 1 0 0 0\n" + + " 14 15 1 1 0 0\n" + + " 14 16 1 0 0 0\n" + + " 2 16 1 0 0 0\n" + + " 16 17 1 0 0 0\n" + + " 14 18 1 0 0 0\n" + + " 18 19 1 1 0 0\n" + + " 18 20 1 0 0 0\n" + + " 20 21 1 0 0 0\n" + + " 2 21 1 0 0 0\n" + + " 21 22 1 1 0 0\n" + + " 20 23 1 0 0 0\n" + + " 23 24 1 1 0 0\n" + + " 23 25 1 0 0 0\n" + + " 25 26 1 0 0 0\n" + + " 26 27 1 0 0 0\n" + + " 27 28 2 0 0 0\n" + + " 29 27 1 0 0 0\n" + + " 29 30 1 6 0 0\n" + + " 30 31 1 0 0 0\n" + + " 31 32 1 0 0 0\n" + + " 18 32 1 0 0 0\n" + + " 32 33 1 1 0 0\n" + + " 32 34 1 0 0 0\n" + + " 34 35 1 0 0 0\n" + + " 35 36 1 0 0 0\n" + + " 36 37 1 0 0 0\n" + + " 37 38 1 0 0 0\n" + + " 37 39 2 0 0 0\n" + + " 35 40 2 0 0 0\n" + + " 40 41 1 0 0 0\n" + + " 40 42 1 0 0 0\n" + + " 42 43 1 0 0 0\n" + + " 43 44 1 0 0 0\n" + + " 44 45 1 0 0 0\n" + + " 45 46 1 0 0 0\n" + + " 42 46 2 0 0 0\n" + + " 46 47 1 0 0 0\n" + + " 23 47 1 0 0 0\n" + + " 34 47 2 0 0 0\n" + + " 29 48 1 0 0 0\n" + + " 48 49 1 0 0 0\n" + + " 49 50 1 0 0 0\n" + + " 50 51 1 0 0 0\n" + + " 51 52 1 0 0 0\n" + + " 52 53 2 0 0 0\n" + + " 53 54 1 0 0 0\n" + + " 53 55 1 0 0 0\n" + + " 55 56 1 0 0 0\n" + + " 56 57 1 0 0 0\n" + + " 55 58 2 0 0 0\n" + + " 58 59 1 0 0 0\n" + + " 29 59 1 0 0 0\n" + + " 51 59 2 0 0 0\n" + + "M END", "[H][C@@]12CC3=C(C(O)=C(OC)C(C)=C3)[C@@]([H])(N1C)[C@@]4([H])N([C@H]2O)[C@@]5([H])COC(=O)[C@]8(CS[C@]4([H])C6=C5C7=C(OCO7)C(C)=C6OC(C)=O)NCCC9=C8C=C(OC)C(O)=C9").name("strereo parity issue 1")); + //C(C)1CCC(C)CC1 + tests.add(LychiTestInstance.equivalent("[C@H](C)1CCC(C)CC1","C(C)1CCC(C)CC1").name("meaningless streo on a ring shouldn't be honored")); + tests.add(LychiTestInstance.equivalent("[C@H](C)1CCC(C)CC1","[C@@H](C)1CCC(C)CC1")); + + tests.add(LychiTestInstance.equivalent("C[C@H]1CC[C@@H](C)CC1","C[C@@H]1CC[C@H](C)CC1").name("opposite form of cis/trans on ring should be the same")); + + tests.add(LychiTestInstance.notEquivalent("C[C@H]1CC[C@@H](C)CC1","C[C@H]1CC[C@H](C)CC1").name("cis across ring is different from trans across ring")); + + + tests.add(LychiTestInstance.equivalent("C[C@H]1C[C@@H](C)CC(C)C1","C[C@@H]1C[C@H](C)CC(C)C1").name("symmetric half-defined stereo should be the same")); + + //O[C@H]1CC(O)CC(O)C1 + tests.add(LychiTestInstance.equivalent("O[C@H]1CC(O)CC(O)C1","O[C@@H]1CC(O)CC(O)C1").name("3-center, 1 specified meaningless center should be same as inverted")); + + tests.add(LychiTestInstance.equivalent("C[C@H]1OC(C)O[C@@H](C)O1","CC1OC(C)OC(C)O1").name("meaningless stereo with 2 dashed bonds on ring shouldn't be honored")); + + //OC1C(O)C(O)C(O)[C@@H](O)[C@H]1O + tests.add(LychiTestInstance.equivalent("OC1C(O)C(O)C(O)[C@@H](O)[C@H]1O","OC1C(O)C(O)C(O)[C@H](O)[C@@H]1O").name("semi-meaningful symmetric stereo honored")); + tests.add(LychiTestInstance.notEquivalent("OC1C(O)C(O)C(O)[C@@H](O)[C@H]1O","OC1C(O)C(O)C(O)[C@@H](O)[C@@H]1O").name("distinct semi-meaningful symmetric stereo honored")); + + tests.add(LychiTestInstance.equivalent("OC1[C@H](O)[C@H](O)C1O","OC1[C@@H](O)[C@@H](O)C1O").name("4-center, 2 specified symmetric meaningful stereo should be same as inverted")); + tests.add(LychiTestInstance.notEquivalent("OC1[C@H](O)[C@H](O)C1O","OC1[C@@H](O)[C@H](O)C1O").name("4-center, 2 specified symmetric meaningful stereo should not be same as 1 center modified")); + + + //OC1[C@H](O)[C@H](O)C1O + + //C[C@H]1OC(C)O[C@@H](C)O1 + //[#6][C@H]1C[C@@H]([#6])CC([#6])C1.[#6][C@H]2CC([#6])C[C@@H]([#6])C2 + tests.add(LychiTestInstance.equivalent("\n" + + " MJ150420 \n" + + "\n" + + " 8 8 0 0 0 0 0 0 0 0999 V2000\n" + + " -2.2656 0.8138 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -2.9801 0.4013 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -2.9801 -0.4237 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -2.2656 -0.8361 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -1.5511 -0.4237 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -1.5511 0.4013 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.8366 0.8138 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.8366 -0.0111 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 1 2 1 0 0 0 0\n" + + " 1 6 1 0 0 0 0\n" + + " 2 3 1 0 0 0 0\n" + + " 3 4 1 0 0 0 0\n" + + " 4 5 1 0 0 0 0\n" + + " 5 6 1 0 0 0 0\n" + + " 6 7 1 1 0 0 0\n" + + " 6 8 1 6 0 0 0\n" + + "M END","C1CCCCC1").name("meaningless stereo 1")); return tests.stream().map(ls->ls.asJunitInput()).collect(Collectors.toList()); From 71b8ebf3228ff1253454a418e6ec531abcbd88b5 Mon Sep 17 00:00:00 2001 From: Tyler Peryea Date: Thu, 11 Apr 2019 13:48:49 -0400 Subject: [PATCH 02/25] allow symmetrical isotopic structure to be consistent at layer 3 --- src/main/java/lychi/LyChIStandardizer.java | 15 +++++++- src/test/java/lychi/LychiRegressionTest.java | 38 ++++++++++++++++---- 2 files changed, 46 insertions(+), 7 deletions(-) diff --git a/src/main/java/lychi/LyChIStandardizer.java b/src/main/java/lychi/LyChIStandardizer.java index 1cfd5be..5380bd3 100644 --- a/src/main/java/lychi/LyChIStandardizer.java +++ b/src/main/java/lychi/LyChIStandardizer.java @@ -2865,6 +2865,8 @@ public static String[] hashKeyArray (Molecule input) { if (a.getAtno() == 1) m0.removeNode(a); } + + Molecule m1 = m0.cloneMolecule(); int[] atno = new int[m0.getAtomCount()]; @@ -2891,9 +2893,18 @@ public static String[] hashKeyArray (Molecule input) { int[] rank = new int[atno.length]; m0.getGrinv(rank); + for (int i = 0; i < atno.length; ++i) { - rank[i] *= atno[i]; // update rank to resolve symmetry + rank[i] *= atno[i]*5; // update rank to resolve symmetry + rank[i] -= m1.getAtom(i).getImplicitHcount(); // break symmetry when it's based on bond order + } + + + for (AtomIterator ai =new AtomIterator (m1, rank); ai.hasNext(); ) { + MolAtom a = ai.next(); } + + for (AtomIterator ai = new AtomIterator (m0, rank); ai.hasNext(); ai.next()) { @@ -2906,7 +2917,9 @@ public static String[] hashKeyArray (Molecule input) { sb = new StringBuilder (); for (AtomIterator ai =new AtomIterator (m1, rank); ai.hasNext(); ) { MolAtom a = ai.next(); + sb.append(a.getSymbol()+a.getImplicitHcount()); + } // level1: skeleton with atom label String level2 = sb.toString(); diff --git a/src/test/java/lychi/LychiRegressionTest.java b/src/test/java/lychi/LychiRegressionTest.java index 85b8b48..05520d9 100644 --- a/src/test/java/lychi/LychiRegressionTest.java +++ b/src/test/java/lychi/LychiRegressionTest.java @@ -29,6 +29,9 @@ public static class LychiTestInstance{ String expectedLychi; boolean shouldMatch=true; + int layer = 4; + + public static LychiTestInstance of(String smi, String lychi){ LychiTestInstance ltest= new LychiTestInstance(); @@ -51,6 +54,16 @@ public static LychiTestInstance equivalent(String smi1, String smi2){ throw new RuntimeException(e); } } + + public LychiTestInstance layer(int layer){ + this.layer=layer; + return this; + } + + public static LychiTestInstance equivalentLayer3(String smi1, String smi2){ + return equivalent(smi1,smi2).layer(3); + } + public static LychiTestInstance notEquivalent(String smi1, String smi2){ return equivalent(smi1,smi2).negate(); } @@ -86,14 +99,18 @@ public LychiRegressionTest(String ignored, LychiTestInstance spec){ this.spec = spec; } - public static void basicTest(Molecule m, String expected, boolean match) throws Exception{ + public static void basicTest(Molecule m, String expected, boolean match, int layerMatch) throws Exception{ LyChIStandardizer std = new LyChIStandardizer(); std.standardize(m); String fullKey=LyChIStandardizer.hashKey(m); + + String layer = fullKey.split("-")[layerMatch-1]; + String expectedLayer = expected.split("-")[layerMatch-1]; + if(match){ - assertEquals(expected,fullKey); + assertEquals(expectedLayer,layer); }else{ - assertNotEquals(expected,fullKey); + assertNotEquals(expectedLayer,layer); } } @@ -126,7 +143,7 @@ public static Molecule shuffleMolecule(Molecule m, int[] map){ @Test public void correctLychiFirstTime() throws Exception{ - basicTest(spec.getMolecule(),spec.expectedLychi, spec.shouldMatch); + basicTest(spec.getMolecule(),spec.expectedLychi, spec.shouldMatch, spec.layer); } @Test @@ -148,7 +165,7 @@ public void correctLychiAfterRandomShuffle() throws Exception{ int[] map =iatoms.stream().mapToInt(i1->i1).toArray(); Molecule s=shuffleMolecule(m,map); - basicTest(s,spec.expectedLychi,spec.shouldMatch); + basicTest(s,spec.expectedLychi,spec.shouldMatch, spec.layer); } } @@ -162,7 +179,7 @@ public void daisyChainLychiAfter10Times() throws Exception{ Collections.shuffle(iatoms); int[] map =iatoms.stream().mapToInt(i1->i1).toArray(); Molecule s=shuffleMolecule(m,map); - basicTest(s,spec.expectedLychi,spec.shouldMatch); + basicTest(s,spec.expectedLychi,spec.shouldMatch, spec.layer); m=s; } } @@ -361,6 +378,15 @@ public static List data(){ "M END","C1CCCCC1").name("meaningless stereo 1")); + tests.add(LychiTestInstance.equivalentLayer3("[H][C@@](O)(CO)[C@@]([H])(O)[C@]([H])(O)[C@@]([H])(O)C=O", "[H][C@](O)(C=O)[C@@]([H])(O)[C@]([H])(O)[C@]([H])(O)C([2H])([2H])O") + .name("Hydrogen Isotope Same Layer 3") + ); + + + tests.add(LychiTestInstance.of("[H][C@](C)(CC)[C@]([H])(NC(=O)[C@]([H])(CCC(O)=O)N=C(O)[C@]([H])(CCC(O)=O)N=C(O)COCCOCCNC(=O)C1=CC2=C(C=C1)C3(OC2=O)C4=C(OC5=C3C=CC(O)=C5)C=C(O)C=C4)C(=O)N[C@@]([H])(CCCC)C(O)=N[C@@]([H])(CCCN=C(N)N)C(O)=N[C@@]([H])(CCCN=C(N)N)C(O)=NCCCOCC(COCCCN=C(O)[C@]([H])(CCCN=C(N)N)N=C(O)[C@]([H])(CCCN=C(N)N)N=C(O)[C@]([H])(CCCC)NC(=O)[C@@]([H])(NC(=O)[C@]([H])(CCC(O)=O)N=C(O)[C@]([H])(CCC(O)=O)N=C(O)COCCOCCNC(=O)C6=CC7=C(C=C6)C8(OC7=O)C9=C(OC%10=C8C=CC(O)=C%10)C=C(O)C=C9)[C@@]([H])(C)CC)(COCCCN=C(O)[C@]([H])(CCCN=C(N)N)N=C(O)[C@]([H])(CCCN=C(N)N)N=C(O)[C@]([H])(CCCC)NC(=O)[C@@]([H])(NC(=O)[C@]([H])(CCC(O)=O)N=C(O)[C@]([H])(CCC(O)=O)N=C(O)COCCOCCNC(=O)C%11=CC%12=C(C=C%11)C%13(OC%12=O)C%14=C(OC%15=C%13C=CC(O)=C%15)C=C(O)C=C%14)[C@@]([H])(C)CC)N=C(N)O","PY2Z7DXNU-UTQVUB5614-U4T1XF2AQV3-U43YSFQF6PCQ").name("big structure")); + + + return tests.stream().map(ls->ls.asJunitInput()).collect(Collectors.toList()); } } From 0f04c86169622dba3d221bbe5b795fcab0a98756 Mon Sep 17 00:00:00 2001 From: Tyler Peryea Date: Thu, 11 Apr 2019 13:49:17 -0400 Subject: [PATCH 03/25] commented out unfinished test --- src/test/java/lychi/LychiRegressionTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/lychi/LychiRegressionTest.java b/src/test/java/lychi/LychiRegressionTest.java index 05520d9..786d10c 100644 --- a/src/test/java/lychi/LychiRegressionTest.java +++ b/src/test/java/lychi/LychiRegressionTest.java @@ -383,7 +383,7 @@ public static List data(){ ); - tests.add(LychiTestInstance.of("[H][C@](C)(CC)[C@]([H])(NC(=O)[C@]([H])(CCC(O)=O)N=C(O)[C@]([H])(CCC(O)=O)N=C(O)COCCOCCNC(=O)C1=CC2=C(C=C1)C3(OC2=O)C4=C(OC5=C3C=CC(O)=C5)C=C(O)C=C4)C(=O)N[C@@]([H])(CCCC)C(O)=N[C@@]([H])(CCCN=C(N)N)C(O)=N[C@@]([H])(CCCN=C(N)N)C(O)=NCCCOCC(COCCCN=C(O)[C@]([H])(CCCN=C(N)N)N=C(O)[C@]([H])(CCCN=C(N)N)N=C(O)[C@]([H])(CCCC)NC(=O)[C@@]([H])(NC(=O)[C@]([H])(CCC(O)=O)N=C(O)[C@]([H])(CCC(O)=O)N=C(O)COCCOCCNC(=O)C6=CC7=C(C=C6)C8(OC7=O)C9=C(OC%10=C8C=CC(O)=C%10)C=C(O)C=C9)[C@@]([H])(C)CC)(COCCCN=C(O)[C@]([H])(CCCN=C(N)N)N=C(O)[C@]([H])(CCCN=C(N)N)N=C(O)[C@]([H])(CCCC)NC(=O)[C@@]([H])(NC(=O)[C@]([H])(CCC(O)=O)N=C(O)[C@]([H])(CCC(O)=O)N=C(O)COCCOCCNC(=O)C%11=CC%12=C(C=C%11)C%13(OC%12=O)C%14=C(OC%15=C%13C=CC(O)=C%15)C=C(O)C=C%14)[C@@]([H])(C)CC)N=C(N)O","PY2Z7DXNU-UTQVUB5614-U4T1XF2AQV3-U43YSFQF6PCQ").name("big structure")); + //tests.add(LychiTestInstance.of("[H][C@](C)(CC)[C@]([H])(NC(=O)[C@]([H])(CCC(O)=O)N=C(O)[C@]([H])(CCC(O)=O)N=C(O)COCCOCCNC(=O)C1=CC2=C(C=C1)C3(OC2=O)C4=C(OC5=C3C=CC(O)=C5)C=C(O)C=C4)C(=O)N[C@@]([H])(CCCC)C(O)=N[C@@]([H])(CCCN=C(N)N)C(O)=N[C@@]([H])(CCCN=C(N)N)C(O)=NCCCOCC(COCCCN=C(O)[C@]([H])(CCCN=C(N)N)N=C(O)[C@]([H])(CCCN=C(N)N)N=C(O)[C@]([H])(CCCC)NC(=O)[C@@]([H])(NC(=O)[C@]([H])(CCC(O)=O)N=C(O)[C@]([H])(CCC(O)=O)N=C(O)COCCOCCNC(=O)C6=CC7=C(C=C6)C8(OC7=O)C9=C(OC%10=C8C=CC(O)=C%10)C=C(O)C=C9)[C@@]([H])(C)CC)(COCCCN=C(O)[C@]([H])(CCCN=C(N)N)N=C(O)[C@]([H])(CCCN=C(N)N)N=C(O)[C@]([H])(CCCC)NC(=O)[C@@]([H])(NC(=O)[C@]([H])(CCC(O)=O)N=C(O)[C@]([H])(CCC(O)=O)N=C(O)COCCOCCNC(=O)C%11=CC%12=C(C=C%11)C%13(OC%12=O)C%14=C(OC%15=C%13C=CC(O)=C%15)C=C(O)C=C%14)[C@@]([H])(C)CC)N=C(N)O","PY2Z7DXNU-UTQVUB5614-U4T1XF2AQV3-U43YSFQF6PCQ").name("big structure")); From 7391993aff70d5f3ef662e2740d3a220c39ce8b2 Mon Sep 17 00:00:00 2001 From: Tyler Peryea Date: Thu, 11 Apr 2019 13:57:40 -0400 Subject: [PATCH 04/25] removing extra symmetry features --- pom.xml | 2 +- src/main/java/lychi/LyChIStandardizer.java | 123 ------------------- src/test/java/lychi/LychiRegressionTest.java | 19 ++- 3 files changed, 16 insertions(+), 128 deletions(-) diff --git a/pom.xml b/pom.xml index 7419356..25d4699 100644 --- a/pom.xml +++ b/pom.xml @@ -7,7 +7,7 @@ gov.nih.ncats lychi jar - 0.5.1 + 0.5.2 Lychi diff --git a/src/main/java/lychi/LyChIStandardizer.java b/src/main/java/lychi/LyChIStandardizer.java index 5380bd3..796aff6 100644 --- a/src/main/java/lychi/LyChIStandardizer.java +++ b/src/main/java/lychi/LyChIStandardizer.java @@ -1166,129 +1166,6 @@ else if (chiral != 0) { int i = m.indexOf(me.getKey()); m.setChirality(i, me.getValue()); } - try{ - - Map nonChiralStereo = new LinkedHashMap<>(); - - for(int k=0;k rings = new HashSet(); - - int[][] sssr=m.getSSSR(); - for(MolAtom ma:nonChiralStereo.keySet()){ - //need to find all atoms in the ring - int im=m.indexOf(ma); - for(int[] ir:sssr){ - for(int i=0;i ratoms=Arrays.stream(rr) - .mapToObj(i->m.getAtom(i)) - .collect(Collectors.toSet()); - - MolBond[] bonds=ratoms.stream() - .filter(a->!chirality.containsKey(a)) - .flatMap(a->IntStream.range(0, a.getEdgeCount()).mapToObj(i->a.getEdge(i))) - .filter(e->!ratoms.contains(e.getNode1()) || !ratoms.contains(e.getNode2())) - .map(b->(MolBond)b) - .filter(b->b.getType()==1) - .peek(b->{ - if(ratoms.contains(b.getAtom1()))b.swap(); - }) - .toArray(i->new MolBond[i]); - - BitSet bs = new BitSet(bonds.length*2); - for(int i=0;i allPossible = new HashSet(); - Set currentPossible = new HashSet(); - - for(int i=0;i>j&1)==1){ - onOff.set(j*2); - bonds[j].setFlags(MolBond.UP, MolBond.STEREO1_MASK); - }else{ - onOff.set(j*2+1); - bonds[j].setFlags(MolBond.DOWN, MolBond.STEREO1_MASK); - } - } - Molecule mclone=m.cloneMolecule(); - //(new LyChIStandardizer()).standardize(mclone); - String hash1=LyChIStandardizer.hashKey(mclone); - allPossible.add(hash1); - onOff.or(bs); - - if(onOff.cardinality() == bs.cardinality()){ - currentPossible.add(hash1); - } - } - if(allPossible.size()==currentPossible.size()){ - for(int j=0;j me : chirality.entrySet()) { diff --git a/src/test/java/lychi/LychiRegressionTest.java b/src/test/java/lychi/LychiRegressionTest.java index 786d10c..6a3dad0 100644 --- a/src/test/java/lychi/LychiRegressionTest.java +++ b/src/test/java/lychi/LychiRegressionTest.java @@ -328,20 +328,20 @@ public static List data(){ " 51 59 2 0 0 0\n" + "M END", "[H][C@@]12CC3=C(C(O)=C(OC)C(C)=C3)[C@@]([H])(N1C)[C@@]4([H])N([C@H]2O)[C@@]5([H])COC(=O)[C@]8(CS[C@]4([H])C6=C5C7=C(OCO7)C(C)=C6OC(C)=O)NCCC9=C8C=C(OC)C(O)=C9").name("strereo parity issue 1")); //C(C)1CCC(C)CC1 - tests.add(LychiTestInstance.equivalent("[C@H](C)1CCC(C)CC1","C(C)1CCC(C)CC1").name("meaningless streo on a ring shouldn't be honored")); - tests.add(LychiTestInstance.equivalent("[C@H](C)1CCC(C)CC1","[C@@H](C)1CCC(C)CC1")); + + tests.add(LychiTestInstance.equivalent("[C@H](C)1CCC(C)CC1","[C@@H](C)1CCC(C)CC1").name("meaningless stereo on a ring the same as opposite meaningless stereo on ring")); tests.add(LychiTestInstance.equivalent("C[C@H]1CC[C@@H](C)CC1","C[C@@H]1CC[C@H](C)CC1").name("opposite form of cis/trans on ring should be the same")); tests.add(LychiTestInstance.notEquivalent("C[C@H]1CC[C@@H](C)CC1","C[C@H]1CC[C@H](C)CC1").name("cis across ring is different from trans across ring")); - tests.add(LychiTestInstance.equivalent("C[C@H]1C[C@@H](C)CC(C)C1","C[C@@H]1C[C@H](C)CC(C)C1").name("symmetric half-defined stereo should be the same")); + //O[C@H]1CC(O)CC(O)C1 tests.add(LychiTestInstance.equivalent("O[C@H]1CC(O)CC(O)C1","O[C@@H]1CC(O)CC(O)C1").name("3-center, 1 specified meaningless center should be same as inverted")); - tests.add(LychiTestInstance.equivalent("C[C@H]1OC(C)O[C@@H](C)O1","CC1OC(C)OC(C)O1").name("meaningless stereo with 2 dashed bonds on ring shouldn't be honored")); + //OC1C(O)C(O)C(O)[C@@H](O)[C@H]1O tests.add(LychiTestInstance.equivalent("OC1C(O)C(O)C(O)[C@@H](O)[C@H]1O","OC1C(O)C(O)C(O)[C@H](O)[C@@H]1O").name("semi-meaningful symmetric stereo honored")); @@ -385,7 +385,18 @@ public static List data(){ //tests.add(LychiTestInstance.of("[H][C@](C)(CC)[C@]([H])(NC(=O)[C@]([H])(CCC(O)=O)N=C(O)[C@]([H])(CCC(O)=O)N=C(O)COCCOCCNC(=O)C1=CC2=C(C=C1)C3(OC2=O)C4=C(OC5=C3C=CC(O)=C5)C=C(O)C=C4)C(=O)N[C@@]([H])(CCCC)C(O)=N[C@@]([H])(CCCN=C(N)N)C(O)=N[C@@]([H])(CCCN=C(N)N)C(O)=NCCCOCC(COCCCN=C(O)[C@]([H])(CCCN=C(N)N)N=C(O)[C@]([H])(CCCN=C(N)N)N=C(O)[C@]([H])(CCCC)NC(=O)[C@@]([H])(NC(=O)[C@]([H])(CCC(O)=O)N=C(O)[C@]([H])(CCC(O)=O)N=C(O)COCCOCCNC(=O)C6=CC7=C(C=C6)C8(OC7=O)C9=C(OC%10=C8C=CC(O)=C%10)C=C(O)C=C9)[C@@]([H])(C)CC)(COCCCN=C(O)[C@]([H])(CCCN=C(N)N)N=C(O)[C@]([H])(CCCN=C(N)N)N=C(O)[C@]([H])(CCCC)NC(=O)[C@@]([H])(NC(=O)[C@]([H])(CCC(O)=O)N=C(O)[C@]([H])(CCC(O)=O)N=C(O)COCCOCCNC(=O)C%11=CC%12=C(C=C%11)C%13(OC%12=O)C%14=C(OC%15=C%13C=CC(O)=C%15)C=C(O)C=C%14)[C@@]([H])(C)CC)N=C(N)O","PY2Z7DXNU-UTQVUB5614-U4T1XF2AQV3-U43YSFQF6PCQ").name("big structure")); + //These are tests that don't pass currently, because they deal + //with complex symmetry, should be uncommented later + /* + tests.add(LychiTestInstance.equivalent("C[C@H]1C[C@@H](C)CC(C)C1","C[C@@H]1C[C@H](C)CC(C)C1") + .name("symmetric half-defined stereo should be the same")); + + tests.add(LychiTestInstance.equivalent("[C@H](C)1CCC(C)CC1","C(C)1CCC(C)CC1") + .name("meaningless stereo on a ring is the same as no stereo on a ring")); + tests.add(LychiTestInstance.equivalent("C[C@H]1OC(C)O[C@@H](C)O1","CC1OC(C)OC(C)O1") + .name("meaningless stereo with 2 dashed bonds on ring shouldn't be honored")); + */ return tests.stream().map(ls->ls.asJunitInput()).collect(Collectors.toList()); } From 930162d1bd4204dc837d4af6d18b6b7d69f17c3b Mon Sep 17 00:00:00 2001 From: Tyler Peryea Date: Thu, 11 Apr 2019 13:57:59 -0400 Subject: [PATCH 05/25] reverted version --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 25d4699..7419356 100644 --- a/pom.xml +++ b/pom.xml @@ -7,7 +7,7 @@ gov.nih.ncats lychi jar - 0.5.2 + 0.5.1 Lychi From 3fe79e3090dfceeaa772753d2f727e8cc8190daf Mon Sep 17 00:00:00 2001 From: Tyler Peryea Date: Thu, 11 Apr 2019 15:51:15 -0400 Subject: [PATCH 06/25] added another test case --- src/main/java/lychi/LyChIStandardizer.java | 5 ----- src/test/java/lychi/LychiRegressionTest.java | 4 ++++ 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/main/java/lychi/LyChIStandardizer.java b/src/main/java/lychi/LyChIStandardizer.java index 796aff6..f09e81c 100644 --- a/src/main/java/lychi/LyChIStandardizer.java +++ b/src/main/java/lychi/LyChIStandardizer.java @@ -2777,11 +2777,6 @@ public static String[] hashKeyArray (Molecule input) { } - for (AtomIterator ai =new AtomIterator (m1, rank); ai.hasNext(); ) { - MolAtom a = ai.next(); - } - - for (AtomIterator ai = new AtomIterator (m0, rank); ai.hasNext(); ai.next()) { diff --git a/src/test/java/lychi/LychiRegressionTest.java b/src/test/java/lychi/LychiRegressionTest.java index 6a3dad0..73238dd 100644 --- a/src/test/java/lychi/LychiRegressionTest.java +++ b/src/test/java/lychi/LychiRegressionTest.java @@ -382,6 +382,10 @@ public static List data(){ .name("Hydrogen Isotope Same Layer 3") ); + + tests.add(LychiTestInstance.equivalentLayer3("[H][C@]1(CC(O)=O)CCC2=C1N(CC3=CC=C(Cl)C=C3)C4=C2C=C(F)C=C4S(C)(=O)=O", "CS(=O)(=O)C1=CC(F)=CC2=C1N(CC3=CC=C(Cl)C=C3)C4=C2CCC4CC(O)=O") + .name("Strange graph invariant problem") + ); //tests.add(LychiTestInstance.of("[H][C@](C)(CC)[C@]([H])(NC(=O)[C@]([H])(CCC(O)=O)N=C(O)[C@]([H])(CCC(O)=O)N=C(O)COCCOCCNC(=O)C1=CC2=C(C=C1)C3(OC2=O)C4=C(OC5=C3C=CC(O)=C5)C=C(O)C=C4)C(=O)N[C@@]([H])(CCCC)C(O)=N[C@@]([H])(CCCN=C(N)N)C(O)=N[C@@]([H])(CCCN=C(N)N)C(O)=NCCCOCC(COCCCN=C(O)[C@]([H])(CCCN=C(N)N)N=C(O)[C@]([H])(CCCN=C(N)N)N=C(O)[C@]([H])(CCCC)NC(=O)[C@@]([H])(NC(=O)[C@]([H])(CCC(O)=O)N=C(O)[C@]([H])(CCC(O)=O)N=C(O)COCCOCCNC(=O)C6=CC7=C(C=C6)C8(OC7=O)C9=C(OC%10=C8C=CC(O)=C%10)C=C(O)C=C9)[C@@]([H])(C)CC)(COCCCN=C(O)[C@]([H])(CCCN=C(N)N)N=C(O)[C@]([H])(CCCN=C(N)N)N=C(O)[C@]([H])(CCCC)NC(=O)[C@@]([H])(NC(=O)[C@]([H])(CCC(O)=O)N=C(O)[C@]([H])(CCC(O)=O)N=C(O)COCCOCCNC(=O)C%11=CC%12=C(C=C%11)C%13(OC%12=O)C%14=C(OC%15=C%13C=CC(O)=C%15)C=C(O)C=C%14)[C@@]([H])(C)CC)N=C(N)O","PY2Z7DXNU-UTQVUB5614-U4T1XF2AQV3-U43YSFQF6PCQ").name("big structure")); From 7ec4072b31c173dc6ae795653ac5bd761ed58d0d Mon Sep 17 00:00:00 2001 From: Tyler Peryea Date: Fri, 12 Apr 2019 13:49:21 -0400 Subject: [PATCH 07/25] updated logic for tie-breaking on graph invarient collision --- src/main/java/lychi/LyChIStandardizer.java | 24 ++++++++++++++++++-- src/test/java/lychi/LychiRegressionTest.java | 4 +++- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/src/main/java/lychi/LyChIStandardizer.java b/src/main/java/lychi/LyChIStandardizer.java index f09e81c..3248cdc 100644 --- a/src/main/java/lychi/LyChIStandardizer.java +++ b/src/main/java/lychi/LyChIStandardizer.java @@ -2771,12 +2771,32 @@ public static String[] hashKeyArray (Molecule input) { int[] rank = new int[atno.length]; m0.getGrinv(rank); + + + for (int i = 0; i < atno.length; ++i) { + rank[i] *= atno[i]*1204; // update rank to resolve symmetry + // this is bad, because it actually makes MORE collisions + + } + for (int i = 0; i < atno.length; ++i) { - rank[i] *= atno[i]*5; // update rank to resolve symmetry - rank[i] -= m1.getAtom(i).getImplicitHcount(); // break symmetry when it's based on bond order + for (int j = i+1; j < atno.length; ++j) { + if(rank[i] == rank[j]){ + if(atno[i]!=atno[j]){ + rank[i]+=atno[i]*5; + rank[j]+=atno[j]*5; + } + } + } + } + + for(int i=0;i< atno.length;++i){ + rank[i] -= m1.getAtom(i).getImplicitHcount(); // break symmetry when it's based on bond order } + + for (AtomIterator ai = new AtomIterator (m0, rank); ai.hasNext(); ai.next()) { diff --git a/src/test/java/lychi/LychiRegressionTest.java b/src/test/java/lychi/LychiRegressionTest.java index 73238dd..361f5a4 100644 --- a/src/test/java/lychi/LychiRegressionTest.java +++ b/src/test/java/lychi/LychiRegressionTest.java @@ -339,7 +339,8 @@ public static List data(){ //O[C@H]1CC(O)CC(O)C1 - tests.add(LychiTestInstance.equivalent("O[C@H]1CC(O)CC(O)C1","O[C@@H]1CC(O)CC(O)C1").name("3-center, 1 specified meaningless center should be same as inverted")); + tests.add(LychiTestInstance.equivalent("O[C@H]1CC(O)CC(O)C1","O[C@@H]1CC(O)CC(O)C1") + .name("3-center, 1 specified meaningless center should be same as inverted")); @@ -387,6 +388,7 @@ public static List data(){ .name("Strange graph invariant problem") ); + //tests.add(LychiTestInstance.of("[H][C@](C)(CC)[C@]([H])(NC(=O)[C@]([H])(CCC(O)=O)N=C(O)[C@]([H])(CCC(O)=O)N=C(O)COCCOCCNC(=O)C1=CC2=C(C=C1)C3(OC2=O)C4=C(OC5=C3C=CC(O)=C5)C=C(O)C=C4)C(=O)N[C@@]([H])(CCCC)C(O)=N[C@@]([H])(CCCN=C(N)N)C(O)=N[C@@]([H])(CCCN=C(N)N)C(O)=NCCCOCC(COCCCN=C(O)[C@]([H])(CCCN=C(N)N)N=C(O)[C@]([H])(CCCN=C(N)N)N=C(O)[C@]([H])(CCCC)NC(=O)[C@@]([H])(NC(=O)[C@]([H])(CCC(O)=O)N=C(O)[C@]([H])(CCC(O)=O)N=C(O)COCCOCCNC(=O)C6=CC7=C(C=C6)C8(OC7=O)C9=C(OC%10=C8C=CC(O)=C%10)C=C(O)C=C9)[C@@]([H])(C)CC)(COCCCN=C(O)[C@]([H])(CCCN=C(N)N)N=C(O)[C@]([H])(CCCN=C(N)N)N=C(O)[C@]([H])(CCCC)NC(=O)[C@@]([H])(NC(=O)[C@]([H])(CCC(O)=O)N=C(O)[C@]([H])(CCC(O)=O)N=C(O)COCCOCCNC(=O)C%11=CC%12=C(C=C%11)C%13(OC%12=O)C%14=C(OC%15=C%13C=CC(O)=C%15)C=C(O)C=C%14)[C@@]([H])(C)CC)N=C(N)O","PY2Z7DXNU-UTQVUB5614-U4T1XF2AQV3-U43YSFQF6PCQ").name("big structure")); //These are tests that don't pass currently, because they deal From c2b300e34ece7961bd05b73a70b437f8787a1666 Mon Sep 17 00:00:00 2001 From: Tyler Peryea Date: Fri, 12 Apr 2019 15:08:05 -0400 Subject: [PATCH 08/25] changed comment --- src/main/java/lychi/LyChIStandardizer.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/java/lychi/LyChIStandardizer.java b/src/main/java/lychi/LyChIStandardizer.java index 3248cdc..1735bd9 100644 --- a/src/main/java/lychi/LyChIStandardizer.java +++ b/src/main/java/lychi/LyChIStandardizer.java @@ -2775,8 +2775,7 @@ public static String[] hashKeyArray (Molecule input) { for (int i = 0; i < atno.length; ++i) { rank[i] *= atno[i]*1204; // update rank to resolve symmetry - // this is bad, because it actually makes MORE collisions - + // large number to allow small fiddling for tie breaking } for (int i = 0; i < atno.length; ++i) { From 4297bf80d09bce9f7e2a8f2c45afd89ab9cfe3dc Mon Sep 17 00:00:00 2001 From: Tyler Peryea Date: Fri, 12 Apr 2019 17:03:37 -0400 Subject: [PATCH 09/25] added make bash script --- make.sh | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 make.sh diff --git a/make.sh b/make.sh new file mode 100644 index 0000000..c7bb97e --- /dev/null +++ b/make.sh @@ -0,0 +1,3 @@ +bash mavenMake.sh +mvn clean package + From 9558e1729c1c742a13bfbc115c1b41eef2dd004d Mon Sep 17 00:00:00 2001 From: Tyler Peryea Date: Mon, 15 Apr 2019 14:44:16 -0400 Subject: [PATCH 10/25] changed tie-breaking to be more explicitly consistent with previous mechanism --- pom.xml | 2 +- src/main/java/lychi/LyChIStandardizer.java | 59 +++++++++++++------- src/test/java/lychi/LychiRegressionTest.java | 31 +++++++++- 3 files changed, 70 insertions(+), 22 deletions(-) diff --git a/pom.xml b/pom.xml index 7419356..762b0c7 100644 --- a/pom.xml +++ b/pom.xml @@ -7,7 +7,7 @@ gov.nih.ncats lychi jar - 0.5.1 + 0.5.1ISOTOPE_FIX Lychi diff --git a/src/main/java/lychi/LyChIStandardizer.java b/src/main/java/lychi/LyChIStandardizer.java index 1735bd9..11cae9d 100644 --- a/src/main/java/lychi/LyChIStandardizer.java +++ b/src/main/java/lychi/LyChIStandardizer.java @@ -2708,6 +2708,31 @@ public static String hashKey (Molecule mol, String sep) { return keys[0]+sep+keys[1]+sep+keys[2]+sep+keys[3]; } + + private static Molecule getLayer3Equivalent(Molecule m){ + Molecule m0=m.cloneMolecule(); + + int[] atno = new int[m0.getAtomCount()]; + for (int i = 0; i < atno.length; ++i) { + MolAtom a = m0.getAtom(i); + a.setRadical(0); + a.setCharge(0); + a.setFlags(0); + a.setMassno(0); + a.setAtomMap(i+1); + } + for (MolBond b : m0.getBondArray()) { + b.setStereo2Flags(b.getNode1(), b.getNode2(), 0); + if(b.isQuery()){ //hack + b.setFlags(1); + } + } + Molecule mout = new Molecule(); + ChemUtil.canonicalSMILES(mout,m0,false); + + return mout; + } + /** * Extended version of the hash key that includes the topology+label * layer that sits between the first and second layers of previous @@ -2771,30 +2796,27 @@ public static String[] hashKeyArray (Molecule input) { int[] rank = new int[atno.length]; m0.getGrinv(rank); + int[] fallbackLookup = new int[atno.length]; - - for (int i = 0; i < atno.length; ++i) { - rank[i] *= atno[i]*1204; // update rank to resolve symmetry - // large number to allow small fiddling for tie breaking + try{ + //set the tie-breaking priority based on the layer-3 information + Molecule stdLychi3Mol=getLayer3Equivalent(m1); + MolAtom[] matarr1=stdLychi3Mol.getAtomArray(); + + for (int i = 0; i < atno.length; ++i) { + fallbackLookup[matarr1[i].getAtomMap()-1]=i; + } + }catch(Exception e){ + logger.log(Level.SEVERE, + "Can't produce simplified structure from molecule", e); } + for (int i = 0; i < atno.length; ++i) { - for (int j = i+1; j < atno.length; ++j) { - if(rank[i] == rank[j]){ - if(atno[i]!=atno[j]){ - rank[i]+=atno[i]*5; - rank[j]+=atno[j]*5; - } - } - } + rank[i] = (rank[i]*atno[i]*2048); // update rank to resolve symmetry + rank[i] += fallbackLookup[i]; //tie breaking based on lychi-3 fallback order } - for(int i=0;i< atno.length;++i){ - rank[i] -= m1.getAtom(i).getImplicitHcount(); // break symmetry when it's based on bond order - } - - - for (AtomIterator ai = new AtomIterator (m0, rank); @@ -2824,7 +2846,6 @@ public static String[] hashKeyArray (Molecule input) { "2: "+level2 + "\n"+ "3: "+level3 + "\n"); } - return hashChain45 (level0, level1, level2, level3); } diff --git a/src/test/java/lychi/LychiRegressionTest.java b/src/test/java/lychi/LychiRegressionTest.java index 361f5a4..55d8a27 100644 --- a/src/test/java/lychi/LychiRegressionTest.java +++ b/src/test/java/lychi/LychiRegressionTest.java @@ -4,6 +4,7 @@ import static org.junit.Assert.*; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Random; @@ -104,8 +105,12 @@ public static void basicTest(Molecule m, String expected, boolean match, int lay std.standardize(m); String fullKey=LyChIStandardizer.hashKey(m); - String layer = fullKey.split("-")[layerMatch-1]; - String expectedLayer = expected.split("-")[layerMatch-1]; + String layer = Arrays.stream(fullKey.split("-")) + .limit(layerMatch) + .collect(Collectors.joining("-")); + String expectedLayer = Arrays.stream(expected.split("-")) + .limit(layerMatch) + .collect(Collectors.joining("-")); if(match){ assertEquals(expectedLayer,layer); @@ -135,6 +140,9 @@ public static Molecule shuffleMolecule(Molecule m, int[] map){ int ni2=rmap[oi2]; MolBond nmb=mb.cloneBond(nmas[ni1], nmas[ni2]); m2.add(nmb); + if(nmb.isQuery()){ + nmb.setFlags(mb.getFlags()); + } } return m2; @@ -193,9 +201,25 @@ public static List data(){ tests.add(LychiTestInstance.of("O=C(O[C@H]1C[C@H]2C[C@H]3C[C@@H](C1)N2CC3=O)C4=CNC5=C4C=CC=C5","38C4U16JU-UC5KDUPMVH-UHFJLJL661C-UHCRHDK74DXU").name("cage-like structure")); tests.add(LychiTestInstance.of("C[C@@H]1CC[C@@H](C)CC1","T75RBW5S8-8D9T563A7Y-8YC8NQXD9W5-8Y5MFVTVS3J3").name("trans across ring")); tests.add(LychiTestInstance.of("C[C@H]1CC[C@@H](C)CC1","T75RBW5S8-8D9T563A7Y-8YC8NQXD9W5-8Y5JH5RWXRLR").name("cis across ring")); + + tests.add(LychiTestInstance.of("CN(C)CCOC(C1=CC=CC=C1)C2=CC=CC=C2","SG1MX4TJL-LRQMG7F9KY-LYVJD4DSRGU-LYU23YRCSQTR").name("test lychi change")); + + + tests.add(LychiTestInstance.equivalentLayer3("CC(C)(CO)[C@@H](O)C(=O)NCCC(O)=O","CC(C)(CO)[CH](O)C(=O)NCCC(O)=O").name("layer 3 the same when only stereo changes")); + tests.add(LychiTestInstance.equivalentLayer3("CCCCCCCCCCCCCC.CC(C)(CO)[C@@H](O)C(=O)NCCC(O)=O","CCCCCCCCCCCCCC.CC(C)(CO)[CH](O)C(=O)NCCC(O)=O").name("rare salt should be stripped, regardless of stereo")); + tests.add(LychiTestInstance.of("[H][C@@]12[C@@H]3SC[C@]4(NCCC5=C4C=C(OC)C(O)=C5)C(=O)OC[C@H](N1[C@@H](O)[C@@H]6CC7=C([C@H]2N6C)C(O)=C(OC)C(C)=C7)C8=C9OCOC9=C(C)C(OC(C)=O)=C38", "DCLRH149F-FFMPLZ16VC-FC35942KGAU-FCUDSDS2V1NT").name("round trip problem")); + tests.add(LychiTestInstance.of("[Na+].[Na+].[Na+].[Na+].[O-]P([O-])(=O)OP([O-])([O-])=O", "U42VPKYB8-83HRLLLGLV-8VMGB3AAA1L-8VLXF4WDFH73") + .name("legacy consistency test 1")); + tests.add(LychiTestInstance.of("CC(C)NCC(O)COC1=CC=C(CCOCC2CC2)C=C1", "19W74QJNW-WXMWMLXXWD-WDPBV6R9GFJ-WDJKLYLWS5JW") + .name("legacy consistency test 2")); + tests.add(LychiTestInstance.of("CC1=C(CC(O)=O)C2=C(C=CC(F)=C2)\\C1=C/C3=CC=C(C=C3)S(C)(=O)=O", "4D13QHCQ6-6CKUM1H2QX-6XM1AWY81DJ-6XJV527T8L5X") + .name("legacy consistency test 3")); + + + tests.add(LychiTestInstance.equivalent("\n" + " Ketcher 12201304332D 1 1.00000 0.00000 0\n" + "\n" + @@ -352,6 +376,7 @@ public static List data(){ tests.add(LychiTestInstance.notEquivalent("OC1[C@H](O)[C@H](O)C1O","OC1[C@@H](O)[C@H](O)C1O").name("4-center, 2 specified symmetric meaningful stereo should not be same as 1 center modified")); + //OC1[C@H](O)[C@H](O)C1O //C[C@H]1OC(C)O[C@@H](C)O1 @@ -404,6 +429,8 @@ public static List data(){ .name("meaningless stereo with 2 dashed bonds on ring shouldn't be honored")); */ + + return tests.stream().map(ls->ls.asJunitInput()).collect(Collectors.toList()); } } From 995433131fd5664ed935d43cfe7ebc62e628223a Mon Sep 17 00:00:00 2001 From: Tyler Peryea Date: Tue, 16 Apr 2019 11:55:05 -0400 Subject: [PATCH 11/25] merged with other update --- src/main/java/lychi/LyChIStandardizer.java | 138 ++++++++++++++++++- src/test/java/lychi/LychiRegressionTest.java | 4 +- 2 files changed, 139 insertions(+), 3 deletions(-) diff --git a/src/main/java/lychi/LyChIStandardizer.java b/src/main/java/lychi/LyChIStandardizer.java index 11cae9d..35efafc 100644 --- a/src/main/java/lychi/LyChIStandardizer.java +++ b/src/main/java/lychi/LyChIStandardizer.java @@ -58,10 +58,19 @@ public class LyChIStandardizer { /** * This static version value must be updated if any changes is made - * to this class that would be imcompatible with earlier results!!! + * to this class that would be incompatible with earlier results!!! */ public static final int VERSION = 0x10; + + + /** + * This flag, when true, checks for "deeper" symmetry by enumerating + * unspecified stereo forms and confirming that they + */ + private static final boolean DEEP_SYMMETRY = true; + + static final private boolean DEBUG; static final private boolean UNMEX; // apply UNM extra rules static { @@ -238,6 +247,7 @@ protected SMIRKS[] initialValue () { } }; + static class MolComparator implements Comparator { public int compare (Molecule m1, Molecule m2) { if (m1 == null && m2 == null) return 0; @@ -1149,6 +1159,132 @@ else if (chiral != 0) { } } } + + if(DEEP_SYMMETRY){ + try{ + + Map nonChiralStereo = new LinkedHashMap<>(); + + for(int k=0;k rings = new HashSet(); + + int[][] sssr=m.getSSSR(); + for(MolAtom ma:nonChiralStereo.keySet()){ + //need to find all atoms in the ring + int im=m.indexOf(ma); + for(int[] ir:sssr){ + for(int i=0;i ratoms=Arrays.stream(rr) + .mapToObj(i->m.getAtom(i)) + .collect(Collectors.toSet()); + + MolBond[] bonds=ratoms.stream() + .filter(a->!chirality.containsKey(a)) + .flatMap(a->IntStream.range(0, a.getEdgeCount()).mapToObj(i->a.getEdge(i))) + .filter(e->!ratoms.contains(e.getNode1()) || !ratoms.contains(e.getNode2())) + .map(b->(MolBond)b) + .filter(b->b.getType()==1) + .peek(b->{ + if(ratoms.contains(b.getAtom1()))b.swap(); + }) + .toArray(i->new MolBond[i]); + + BitSet bs = new BitSet(bonds.length*2); + for(int i=0;i allPossible = new HashSet(); + Set currentPossible = new HashSet(); + + for(int i=0;i>j&1)==1){ + onOff.set(j*2); + bonds[j].setFlags(MolBond.UP, MolBond.STEREO1_MASK); + }else{ + onOff.set(j*2+1); + bonds[j].setFlags(MolBond.DOWN, MolBond.STEREO1_MASK); + } + } + Molecule mclone=m.cloneMolecule(); + //(new LyChIStandardizer()).standardize(mclone); + String hash1=LyChIStandardizer.hashKey(mclone); + allPossible.add(hash1); + onOff.or(bs); + + if(onOff.cardinality() == bs.cardinality()){ + currentPossible.add(hash1); + } + } + if(allPossible.size()==currentPossible.size()){ + for(int j=0;j data(){ //These are tests that don't pass currently, because they deal //with complex symmetry, should be uncommented later - /* + tests.add(LychiTestInstance.equivalent("C[C@H]1C[C@@H](C)CC(C)C1","C[C@@H]1C[C@H](C)CC(C)C1") .name("symmetric half-defined stereo should be the same")); @@ -427,7 +427,7 @@ public static List data(){ tests.add(LychiTestInstance.equivalent("C[C@H]1OC(C)O[C@@H](C)O1","CC1OC(C)OC(C)O1") .name("meaningless stereo with 2 dashed bonds on ring shouldn't be honored")); - */ + From b80b4cca05f191b259ee8426f25297a604610902 Mon Sep 17 00:00:00 2001 From: Tyler Peryea Date: Tue, 16 Apr 2019 11:55:42 -0400 Subject: [PATCH 12/25] updated version number --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 762b0c7..dee7ac6 100644 --- a/pom.xml +++ b/pom.xml @@ -7,7 +7,7 @@ gov.nih.ncats lychi jar - 0.5.1ISOTOPE_FIX + 0.5.1ISOTOPE_AND_SYMMETRY_FIX Lychi From 15bd305ae32f051d6f59e3caa668a565142f1902 Mon Sep 17 00:00:00 2001 From: Tyler Peryea Date: Tue, 16 Apr 2019 11:59:07 -0400 Subject: [PATCH 13/25] removed non-symmetry fix components --- pom.xml | 2 +- src/main/java/lychi/LyChIStandardizer.java | 24 ++++++++++---------- src/test/java/lychi/LychiRegressionTest.java | 20 ++++++++-------- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/pom.xml b/pom.xml index dee7ac6..13c54c9 100644 --- a/pom.xml +++ b/pom.xml @@ -7,7 +7,7 @@ gov.nih.ncats lychi jar - 0.5.1ISOTOPE_AND_SYMMETRY_FIX + 0.5.1SYMMETRY_FIX Lychi diff --git a/src/main/java/lychi/LyChIStandardizer.java b/src/main/java/lychi/LyChIStandardizer.java index 35efafc..82c584a 100644 --- a/src/main/java/lychi/LyChIStandardizer.java +++ b/src/main/java/lychi/LyChIStandardizer.java @@ -2934,18 +2934,18 @@ public static String[] hashKeyArray (Molecule input) { int[] fallbackLookup = new int[atno.length]; - try{ - //set the tie-breaking priority based on the layer-3 information - Molecule stdLychi3Mol=getLayer3Equivalent(m1); - MolAtom[] matarr1=stdLychi3Mol.getAtomArray(); - - for (int i = 0; i < atno.length; ++i) { - fallbackLookup[matarr1[i].getAtomMap()-1]=i; - } - }catch(Exception e){ - logger.log(Level.SEVERE, - "Can't produce simplified structure from molecule", e); - } +// try{ +// //set the tie-breaking priority based on the layer-3 information +// Molecule stdLychi3Mol=getLayer3Equivalent(m1); +// MolAtom[] matarr1=stdLychi3Mol.getAtomArray(); +// +// for (int i = 0; i < atno.length; ++i) { +// fallbackLookup[matarr1[i].getAtomMap()-1]=i; +// } +// }catch(Exception e){ +// logger.log(Level.SEVERE, +// "Can't produce simplified structure from molecule", e); +// } for (int i = 0; i < atno.length; ++i) { diff --git a/src/test/java/lychi/LychiRegressionTest.java b/src/test/java/lychi/LychiRegressionTest.java index 1b479b0..223d251 100644 --- a/src/test/java/lychi/LychiRegressionTest.java +++ b/src/test/java/lychi/LychiRegressionTest.java @@ -205,8 +205,8 @@ public static List data(){ tests.add(LychiTestInstance.of("CN(C)CCOC(C1=CC=CC=C1)C2=CC=CC=C2","SG1MX4TJL-LRQMG7F9KY-LYVJD4DSRGU-LYU23YRCSQTR").name("test lychi change")); - tests.add(LychiTestInstance.equivalentLayer3("CC(C)(CO)[C@@H](O)C(=O)NCCC(O)=O","CC(C)(CO)[CH](O)C(=O)NCCC(O)=O").name("layer 3 the same when only stereo changes")); - tests.add(LychiTestInstance.equivalentLayer3("CCCCCCCCCCCCCC.CC(C)(CO)[C@@H](O)C(=O)NCCC(O)=O","CCCCCCCCCCCCCC.CC(C)(CO)[CH](O)C(=O)NCCC(O)=O").name("rare salt should be stripped, regardless of stereo")); + //tests.add(LychiTestInstance.equivalentLayer3("CC(C)(CO)[C@@H](O)C(=O)NCCC(O)=O","CC(C)(CO)[CH](O)C(=O)NCCC(O)=O").name("layer 3 the same when only stereo changes")); + //tests.add(LychiTestInstance.equivalentLayer3("CCCCCCCCCCCCCC.CC(C)(CO)[C@@H](O)C(=O)NCCC(O)=O","CCCCCCCCCCCCCC.CC(C)(CO)[CH](O)C(=O)NCCC(O)=O").name("rare salt should be stripped, regardless of stereo")); tests.add(LychiTestInstance.of("[H][C@@]12[C@@H]3SC[C@]4(NCCC5=C4C=C(OC)C(O)=C5)C(=O)OC[C@H](N1[C@@H](O)[C@@H]6CC7=C([C@H]2N6C)C(O)=C(OC)C(C)=C7)C8=C9OCOC9=C(C)C(OC(C)=O)=C38", "DCLRH149F-FFMPLZ16VC-FC35942KGAU-FCUDSDS2V1NT").name("round trip problem")); @@ -404,14 +404,14 @@ public static List data(){ "M END","C1CCCCC1").name("meaningless stereo 1")); - tests.add(LychiTestInstance.equivalentLayer3("[H][C@@](O)(CO)[C@@]([H])(O)[C@]([H])(O)[C@@]([H])(O)C=O", "[H][C@](O)(C=O)[C@@]([H])(O)[C@]([H])(O)[C@]([H])(O)C([2H])([2H])O") - .name("Hydrogen Isotope Same Layer 3") - ); - - - tests.add(LychiTestInstance.equivalentLayer3("[H][C@]1(CC(O)=O)CCC2=C1N(CC3=CC=C(Cl)C=C3)C4=C2C=C(F)C=C4S(C)(=O)=O", "CS(=O)(=O)C1=CC(F)=CC2=C1N(CC3=CC=C(Cl)C=C3)C4=C2CCC4CC(O)=O") - .name("Strange graph invariant problem") - ); +// tests.add(LychiTestInstance.equivalentLayer3("[H][C@@](O)(CO)[C@@]([H])(O)[C@]([H])(O)[C@@]([H])(O)C=O", "[H][C@](O)(C=O)[C@@]([H])(O)[C@]([H])(O)[C@]([H])(O)C([2H])([2H])O") +// .name("Hydrogen Isotope Same Layer 3") +// ); +// +// +// tests.add(LychiTestInstance.equivalentLayer3("[H][C@]1(CC(O)=O)CCC2=C1N(CC3=CC=C(Cl)C=C3)C4=C2C=C(F)C=C4S(C)(=O)=O", "CS(=O)(=O)C1=CC(F)=CC2=C1N(CC3=CC=C(Cl)C=C3)C4=C2CCC4CC(O)=O") +// .name("Strange graph invariant problem") +// ); //tests.add(LychiTestInstance.of("[H][C@](C)(CC)[C@]([H])(NC(=O)[C@]([H])(CCC(O)=O)N=C(O)[C@]([H])(CCC(O)=O)N=C(O)COCCOCCNC(=O)C1=CC2=C(C=C1)C3(OC2=O)C4=C(OC5=C3C=CC(O)=C5)C=C(O)C=C4)C(=O)N[C@@]([H])(CCCC)C(O)=N[C@@]([H])(CCCN=C(N)N)C(O)=N[C@@]([H])(CCCN=C(N)N)C(O)=NCCCOCC(COCCCN=C(O)[C@]([H])(CCCN=C(N)N)N=C(O)[C@]([H])(CCCN=C(N)N)N=C(O)[C@]([H])(CCCC)NC(=O)[C@@]([H])(NC(=O)[C@]([H])(CCC(O)=O)N=C(O)[C@]([H])(CCC(O)=O)N=C(O)COCCOCCNC(=O)C6=CC7=C(C=C6)C8(OC7=O)C9=C(OC%10=C8C=CC(O)=C%10)C=C(O)C=C9)[C@@]([H])(C)CC)(COCCCN=C(O)[C@]([H])(CCCN=C(N)N)N=C(O)[C@]([H])(CCCN=C(N)N)N=C(O)[C@]([H])(CCCC)NC(=O)[C@@]([H])(NC(=O)[C@]([H])(CCC(O)=O)N=C(O)[C@]([H])(CCC(O)=O)N=C(O)COCCOCCNC(=O)C%11=CC%12=C(C=C%11)C%13(OC%12=O)C%14=C(OC%15=C%13C=CC(O)=C%15)C=C(O)C=C%14)[C@@]([H])(C)CC)N=C(N)O","PY2Z7DXNU-UTQVUB5614-U4T1XF2AQV3-U43YSFQF6PCQ").name("big structure")); From 3105bb4e4034d12c6207670508745dbeb06e3b5c Mon Sep 17 00:00:00 2001 From: Tyler Peryea Date: Wed, 17 Apr 2019 14:01:01 -0400 Subject: [PATCH 14/25] added test case --- src/test/java/lychi/LychiRegressionTest.java | 56 ++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/src/test/java/lychi/LychiRegressionTest.java b/src/test/java/lychi/LychiRegressionTest.java index 1b479b0..2c28d8a 100644 --- a/src/test/java/lychi/LychiRegressionTest.java +++ b/src/test/java/lychi/LychiRegressionTest.java @@ -204,6 +204,61 @@ public static List data(){ tests.add(LychiTestInstance.of("CN(C)CCOC(C1=CC=CC=C1)C2=CC=CC=C2","SG1MX4TJL-LRQMG7F9KY-LYVJD4DSRGU-LYU23YRCSQTR").name("test lychi change")); + tests.add(LychiTestInstance.equivalent("CC(C)(C)C1CCC2(CC1)CCN(CCCN3CCOCC3)CC2","NCGC00013953\n" + + " -IDBS- 1129050841\n\n" + + " 24 26 0 0 0 0 0 0 0 0999 V2000\n" + + " 0.2296 -3.5406 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.5954 -3.5406 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -1.4204 -3.5406 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.5954 -4.3656 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.5954 -2.7156 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.1191 -2.3031 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.1191 -1.4781 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.5954 -1.0656 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -1.3099 -1.4781 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -1.3099 -2.3031 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.1191 -0.6531 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.1191 0.1719 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.5954 0.5844 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.5954 1.4094 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.1191 1.8219 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.1191 2.6469 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.8335 3.0594 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 1.5480 2.6469 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 2.2625 3.0594 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 2.2625 3.8844 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 1.5480 4.2969 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.8335 3.8844 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -1.3099 0.1719 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -1.3099 -0.6531 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 1 2 1 0 0 0\n" + + " 2 3 1 0 0 0\n" + + " 2 4 1 0 0 0\n" + + " 5 2 1 1 0 0\n" + + " 5 6 1 0 0 0\n" + + " 6 7 1 0 0 0\n" + + " 8 7 1 6 0 0\n" + + " 8 9 1 0 0 0\n" + + " 9 10 1 0 0 0\n" + + " 5 10 1 0 0 0\n" + + " 8 11 1 0 0 0\n" + + " 11 12 1 0 0 0\n" + + " 12 13 1 0 0 0\n" + + " 13 14 1 0 0 0\n" + + " 14 15 1 0 0 0\n" + + " 15 16 1 0 0 0\n" + + " 16 17 1 0 0 0\n" + + " 17 18 1 0 0 0\n" + + " 18 19 1 0 0 0\n" + + " 19 20 1 0 0 0\n" + + " 20 21 1 0 0 0\n" + + " 21 22 1 0 0 0\n" + + " 17 22 1 0 0 0\n" + + " 13 23 1 0 0 0\n" + + " 23 24 1 0 0 0\n" + + " 8 24 1 0 0 0\n" + + "M END").name("spiro stereo without meaning should not change lychi")); + tests.add(LychiTestInstance.equivalentLayer3("CC(C)(CO)[C@@H](O)C(=O)NCCC(O)=O","CC(C)(CO)[CH](O)C(=O)NCCC(O)=O").name("layer 3 the same when only stereo changes")); tests.add(LychiTestInstance.equivalentLayer3("CCCCCCCCCCCCCC.CC(C)(CO)[C@@H](O)C(=O)NCCC(O)=O","CCCCCCCCCCCCCC.CC(C)(CO)[CH](O)C(=O)NCCC(O)=O").name("rare salt should be stripped, regardless of stereo")); @@ -431,6 +486,7 @@ public static List data(){ + return tests.stream().map(ls->ls.asJunitInput()).collect(Collectors.toList()); } } From 0f1b3b410a3216fd7673fe5b05a7db5f2e3032b6 Mon Sep 17 00:00:00 2001 From: Tyler Peryea Date: Wed, 1 May 2019 16:00:38 -0400 Subject: [PATCH 15/25] added back in graph invarient tie-breaking fix --- src/main/java/lychi/LyChIStandardizer.java | 24 ++++++++++---------- src/test/java/lychi/LychiRegressionTest.java | 16 ++++++------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/main/java/lychi/LyChIStandardizer.java b/src/main/java/lychi/LyChIStandardizer.java index 82c584a..35efafc 100644 --- a/src/main/java/lychi/LyChIStandardizer.java +++ b/src/main/java/lychi/LyChIStandardizer.java @@ -2934,18 +2934,18 @@ public static String[] hashKeyArray (Molecule input) { int[] fallbackLookup = new int[atno.length]; -// try{ -// //set the tie-breaking priority based on the layer-3 information -// Molecule stdLychi3Mol=getLayer3Equivalent(m1); -// MolAtom[] matarr1=stdLychi3Mol.getAtomArray(); -// -// for (int i = 0; i < atno.length; ++i) { -// fallbackLookup[matarr1[i].getAtomMap()-1]=i; -// } -// }catch(Exception e){ -// logger.log(Level.SEVERE, -// "Can't produce simplified structure from molecule", e); -// } + try{ + //set the tie-breaking priority based on the layer-3 information + Molecule stdLychi3Mol=getLayer3Equivalent(m1); + MolAtom[] matarr1=stdLychi3Mol.getAtomArray(); + + for (int i = 0; i < atno.length; ++i) { + fallbackLookup[matarr1[i].getAtomMap()-1]=i; + } + }catch(Exception e){ + logger.log(Level.SEVERE, + "Can't produce simplified structure from molecule", e); + } for (int i = 0; i < atno.length; ++i) { diff --git a/src/test/java/lychi/LychiRegressionTest.java b/src/test/java/lychi/LychiRegressionTest.java index a56a960..8a6ee4b 100644 --- a/src/test/java/lychi/LychiRegressionTest.java +++ b/src/test/java/lychi/LychiRegressionTest.java @@ -459,14 +459,14 @@ public static List data(){ "M END","C1CCCCC1").name("meaningless stereo 1")); -// tests.add(LychiTestInstance.equivalentLayer3("[H][C@@](O)(CO)[C@@]([H])(O)[C@]([H])(O)[C@@]([H])(O)C=O", "[H][C@](O)(C=O)[C@@]([H])(O)[C@]([H])(O)[C@]([H])(O)C([2H])([2H])O") -// .name("Hydrogen Isotope Same Layer 3") -// ); -// -// -// tests.add(LychiTestInstance.equivalentLayer3("[H][C@]1(CC(O)=O)CCC2=C1N(CC3=CC=C(Cl)C=C3)C4=C2C=C(F)C=C4S(C)(=O)=O", "CS(=O)(=O)C1=CC(F)=CC2=C1N(CC3=CC=C(Cl)C=C3)C4=C2CCC4CC(O)=O") -// .name("Strange graph invariant problem") -// ); + tests.add(LychiTestInstance.equivalentLayer3("[H][C@@](O)(CO)[C@@]([H])(O)[C@]([H])(O)[C@@]([H])(O)C=O", "[H][C@](O)(C=O)[C@@]([H])(O)[C@]([H])(O)[C@]([H])(O)C([2H])([2H])O") + .name("Hydrogen Isotope Same Layer 3") + ); + + + tests.add(LychiTestInstance.equivalentLayer3("[H][C@]1(CC(O)=O)CCC2=C1N(CC3=CC=C(Cl)C=C3)C4=C2C=C(F)C=C4S(C)(=O)=O", "CS(=O)(=O)C1=CC(F)=CC2=C1N(CC3=CC=C(Cl)C=C3)C4=C2CCC4CC(O)=O") + .name("Strange graph invariant problem") + ); //tests.add(LychiTestInstance.of("[H][C@](C)(CC)[C@]([H])(NC(=O)[C@]([H])(CCC(O)=O)N=C(O)[C@]([H])(CCC(O)=O)N=C(O)COCCOCCNC(=O)C1=CC2=C(C=C1)C3(OC2=O)C4=C(OC5=C3C=CC(O)=C5)C=C(O)C=C4)C(=O)N[C@@]([H])(CCCC)C(O)=N[C@@]([H])(CCCN=C(N)N)C(O)=N[C@@]([H])(CCCN=C(N)N)C(O)=NCCCOCC(COCCCN=C(O)[C@]([H])(CCCN=C(N)N)N=C(O)[C@]([H])(CCCN=C(N)N)N=C(O)[C@]([H])(CCCC)NC(=O)[C@@]([H])(NC(=O)[C@]([H])(CCC(O)=O)N=C(O)[C@]([H])(CCC(O)=O)N=C(O)COCCOCCNC(=O)C6=CC7=C(C=C6)C8(OC7=O)C9=C(OC%10=C8C=CC(O)=C%10)C=C(O)C=C9)[C@@]([H])(C)CC)(COCCCN=C(O)[C@]([H])(CCCN=C(N)N)N=C(O)[C@]([H])(CCCN=C(N)N)N=C(O)[C@]([H])(CCCC)NC(=O)[C@@]([H])(NC(=O)[C@]([H])(CCC(O)=O)N=C(O)[C@]([H])(CCC(O)=O)N=C(O)COCCOCCNC(=O)C%11=CC%12=C(C=C%11)C%13(OC%12=O)C%14=C(OC%15=C%13C=CC(O)=C%15)C=C(O)C=C%14)[C@@]([H])(C)CC)N=C(N)O","PY2Z7DXNU-UTQVUB5614-U4T1XF2AQV3-U43YSFQF6PCQ").name("big structure")); From 2926b8b99327a8de3fe759eeba187a96d738d1d3 Mon Sep 17 00:00:00 2001 From: Tyler Peryea Date: Wed, 1 May 2019 16:04:38 -0400 Subject: [PATCH 16/25] fixed version --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 13c54c9..25d4699 100644 --- a/pom.xml +++ b/pom.xml @@ -7,7 +7,7 @@ gov.nih.ncats lychi jar - 0.5.1SYMMETRY_FIX + 0.5.2 Lychi From 16a28c277e326b0e30bf4ea342d798b5c80e9722 Mon Sep 17 00:00:00 2001 From: Tyler Peryea Date: Thu, 2 May 2019 10:20:07 -0400 Subject: [PATCH 17/25] fix for graph invarient issue on layer-3 --- src/main/java/lychi/LyChIStandardizer.java | 12 ++++++++++-- src/test/java/lychi/LychiRegressionTest.java | 7 ++++++- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/src/main/java/lychi/LyChIStandardizer.java b/src/main/java/lychi/LyChIStandardizer.java index 35efafc..ebc2421 100644 --- a/src/main/java/lychi/LyChIStandardizer.java +++ b/src/main/java/lychi/LyChIStandardizer.java @@ -2946,12 +2946,19 @@ public static String[] hashKeyArray (Molecule input) { logger.log(Level.SEVERE, "Can't produce simplified structure from molecule", e); } - + + //System.out.println(molstr); for (int i = 0; i < atno.length; ++i) { - rank[i] = (rank[i]*atno[i]*2048); // update rank to resolve symmetry + rank[i] = (rank[i]*2048); // update rank to resolve symmetry + + + rank[i] += fallbackLookup[i]; //tie breaking based on lychi-3 fallback order + + } + // System.out.println(Arrays.toString(rank)); @@ -2972,6 +2979,7 @@ public static String[] hashKeyArray (Molecule input) { } // level1: skeleton with atom label String level2 = sb.toString(); + System.out.println(level2); // level2: full canonical smiles with stereo/isotope/charge... String level3 = molstr; diff --git a/src/test/java/lychi/LychiRegressionTest.java b/src/test/java/lychi/LychiRegressionTest.java index 8a6ee4b..b69bb46 100644 --- a/src/test/java/lychi/LychiRegressionTest.java +++ b/src/test/java/lychi/LychiRegressionTest.java @@ -263,7 +263,8 @@ public static List data(){ //tests.add(LychiTestInstance.equivalentLayer3("CC(C)(CO)[C@@H](O)C(=O)NCCC(O)=O","CC(C)(CO)[CH](O)C(=O)NCCC(O)=O").name("layer 3 the same when only stereo changes")); //tests.add(LychiTestInstance.equivalentLayer3("CCCCCCCCCCCCCC.CC(C)(CO)[C@@H](O)C(=O)NCCC(O)=O","CCCCCCCCCCCCCC.CC(C)(CO)[CH](O)C(=O)NCCC(O)=O").name("rare salt should be stripped, regardless of stereo")); - tests.add(LychiTestInstance.of("[H][C@@]12[C@@H]3SC[C@]4(NCCC5=C4C=C(OC)C(O)=C5)C(=O)OC[C@H](N1[C@@H](O)[C@@H]6CC7=C([C@H]2N6C)C(O)=C(OC)C(C)=C7)C8=C9OCOC9=C(C)C(OC(C)=O)=C38", "DCLRH149F-FFMPLZ16VC-FC35942KGAU-FCUDSDS2V1NT").name("round trip problem")); + tests.add(LychiTestInstance.equivalent("[H][C@@]12[C@@H]3SC[C@]4(NCCC5=C4C=C(OC)C(O)=C5)C(=O)OC[C@H](N1[C@@H](O)[C@@H]6CC7=C([C@H]2N6C)C(O)=C(OC)C(C)=C7)C8=C9OCOC9=C(C)C(OC(C)=O)=C38", "[H][C@@]12[C@@H]3SC[C@]4(NCCC5=C4C=C(OC)C(O)=C5)C(=O)OC[C@H](N1[C@@H](O)[C@@H]6CC7=C([C@H]2N6C)C(O)=C(OC)C(C)=C7)C8=C9OCOC9=C(C)C(OC(C)=O)=C38") + .name("round trip problem")); tests.add(LychiTestInstance.of("[Na+].[Na+].[Na+].[Na+].[O-]P([O-])(=O)OP([O-])([O-])=O", "U42VPKYB8-83HRLLLGLV-8VMGB3AAA1L-8VLXF4WDFH73") @@ -483,7 +484,11 @@ public static List data(){ tests.add(LychiTestInstance.equivalent("C[C@H]1OC(C)O[C@@H](C)O1","CC1OC(C)OC(C)O1") .name("meaningless stereo with 2 dashed bonds on ring shouldn't be honored")); + tests.add(LychiTestInstance.equivalentLayer3("C1CN(CCN1)C2=CC=C(C=C2)C3=CN4N=CC(=C4N=C3)C5=CC=CC6=C5C=CC=N6","C1CN(CCN1)C2=CC=C(C=C2)C3=CN4N=CC(=C4N=C3)C5=CC=NC6=C5C=CC=C6") + .negate() + .name("Constitutional Isomer Not Equivalent on Layer 3")); + //C1CN(CCN1)C2=CC=C(C=C2)C3=CN4N=CC(=C4N=C3)C5=CC=CC6=C5C=CC=N6.C1CN(CCN1)C2=CC=C(C=C2)C3=CN4N=CC(=C4N=C3)C5=CC=NC6=C5C=CC=C6 From 000119ffff8503f047cc415655f0a1af48b23442 Mon Sep 17 00:00:00 2001 From: Tyler Peryea Date: Thu, 2 May 2019 10:20:58 -0400 Subject: [PATCH 18/25] incremented version to 0.6.0 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 25d4699..d2fc527 100644 --- a/pom.xml +++ b/pom.xml @@ -7,7 +7,7 @@ gov.nih.ncats lychi jar - 0.5.2 + 0.6.0 Lychi From a1cd3475c3a220c3d7a276ce82c3ae086c6997b8 Mon Sep 17 00:00:00 2001 From: Tyler Peryea Date: Thu, 2 May 2019 17:56:28 -0400 Subject: [PATCH 19/25] updated graph invariant to be morgan's --- src/main/java/lychi/LyChIStandardizer.java | 85 ++++++++++++++++---- src/test/java/lychi/LychiRegressionTest.java | 28 ++++--- 2 files changed, 87 insertions(+), 26 deletions(-) diff --git a/src/main/java/lychi/LyChIStandardizer.java b/src/main/java/lychi/LyChIStandardizer.java index ebc2421..2e983fe 100644 --- a/src/main/java/lychi/LyChIStandardizer.java +++ b/src/main/java/lychi/LyChIStandardizer.java @@ -2869,6 +2869,36 @@ private static Molecule getLayer3Equivalent(Molecule m){ return mout; } + + /** + * Takes an int array and replaces each value with the position it would have in a sorted + * unique list. + * + * For example: + * [4,2,1,10,42234,1,42234] + * + * would become: + * + * [2,1,0,3,4,0,4] + * + * + * @param rank + */ + public static void normalizeRanks(int[] rank){ + + int[] last=new int[]{-1, -1}; + IntStream.range(0, rank.length) + .mapToObj(i->new int[]{i,rank[i]}) + .sorted((a,b)->b[1]-a[1]) + .forEach(ab->{ + if(last[1]!=ab[1]){ + last[0]++; + last[1]=ab[1]; + } + rank[ab[0]]=last[0]; + }); + } + /** * Extended version of the hash key that includes the topology+label * layer that sits between the first and second layers of previous @@ -2929,11 +2959,8 @@ public static String[] hashKeyArray (Molecule input) { StringBuilder sb = new StringBuilder (); // level1: topology+atom label - int[] rank = new int[atno.length]; - m0.getGrinv(rank); - + int[] fallbackLookup = new int[atno.length]; - try{ //set the tie-breaking priority based on the layer-3 information Molecule stdLychi3Mol=getLayer3Equivalent(m1); @@ -2946,18 +2973,49 @@ public static String[] hashKeyArray (Molecule input) { logger.log(Level.SEVERE, "Can't produce simplified structure from molecule", e); } - - //System.out.println(molstr); +// int[] rank = new int[atno.length]; +// m0.getGrinv(rank); + + + int[] rank; + { + int MAX_ROUND = 13; + int[][] hash = new int[MAX_ROUND][atno.length]; + for (int i = 0; i < atno.length; ++i) + hash[0][i] = 1; + + int round = 1; + for (; round < MAX_ROUND; ++round) { + int p = round - 1; + for (int i = 0; i < atno.length; ++i) { + MolAtom a = m0.getAtom(i); + int ha = hash[p][i]; + for (int j = 0; j < a.getBondCount(); ++j) { + MolAtom xa = a.getBond(j).getOtherAtom(a); + int k = m0.indexOf(xa); + ha += (1 << xa.getBondCount()) + hash[p][k]; + } + if (ha < 0) { + if (DEBUG) { + logger.log(Level.SEVERE, + "OVERFLOW AT ITERATION "+round+"!"); + } + ha = hash[round-1][i]; + } + hash[round][i] = ha; + } + normalizeRanks(hash[round]); + } + rank = hash[round-1]; + } + + for (int i = 0; i < atno.length; ++i) { - rank[i] = (rank[i]*2048); // update rank to resolve symmetry - - - - rank[i] += fallbackLookup[i]; //tie breaking based on lychi-3 fallback order - - + rank[i] = (rank[i]*2048); // update rank to resolve symmetry + rank[i] += fallbackLookup[i]; //tie breaking based on lychi-3 fallback order } + // System.out.println(Arrays.toString(rank)); @@ -2979,7 +3037,6 @@ public static String[] hashKeyArray (Molecule input) { } // level1: skeleton with atom label String level2 = sb.toString(); - System.out.println(level2); // level2: full canonical smiles with stereo/isotope/charge... String level3 = molstr; diff --git a/src/test/java/lychi/LychiRegressionTest.java b/src/test/java/lychi/LychiRegressionTest.java index b69bb46..bfae60e 100644 --- a/src/test/java/lychi/LychiRegressionTest.java +++ b/src/test/java/lychi/LychiRegressionTest.java @@ -197,12 +197,22 @@ public void daisyChainLychiAfter10Times() throws Exception{ public static List data(){ List tests = new ArrayList<>(); - tests.add(LychiTestInstance.of("CCCCCC","U28WVSD82-2YWKXKS36P-2PQPUKLGUWW-2PWK7BQNJSU6").name("simple carbon chain")); - tests.add(LychiTestInstance.of("O=C(O[C@H]1C[C@H]2C[C@H]3C[C@@H](C1)N2CC3=O)C4=CNC5=C4C=CC=C5","38C4U16JU-UC5KDUPMVH-UHFJLJL661C-UHCRHDK74DXU").name("cage-like structure")); - tests.add(LychiTestInstance.of("C[C@@H]1CC[C@@H](C)CC1","T75RBW5S8-8D9T563A7Y-8YC8NQXD9W5-8Y5MFVTVS3J3").name("trans across ring")); - tests.add(LychiTestInstance.of("C[C@H]1CC[C@@H](C)CC1","T75RBW5S8-8D9T563A7Y-8YC8NQXD9W5-8Y5JH5RWXRLR").name("cis across ring")); +// tests.add(LychiTestInstance.of("CCCCCC","U28WVSD82-2YWKXKS36P-2PQPUKLGUWW-2PWK7BQNJSU6").name("simple carbon chain")); +// tests.add(LychiTestInstance.of("O=C(O[C@H]1C[C@H]2C[C@H]3C[C@@H](C1)N2CC3=O)C4=CNC5=C4C=CC=C5","38C4U16JU-UC5KDUPMVH-UHFJLJL661C-UHCRHDK74DXU").name("cage-like structure")); +// tests.add(LychiTestInstance.of("CN(C)CCOC(C1=CC=CC=C1)C2=CC=CC=C2","SG1MX4TJL-LRQMG7F9KY-LYVJD4DSRGU-LYU23YRCSQTR").name("test lychi change")); +// +// tests.add(LychiTestInstance.of("[Na+].[Na+].[Na+].[Na+].[O-]P([O-])(=O)OP([O-])([O-])=O", "U42VPKYB8-83HRLLLGLV-8VMGB3AAA1L-8VLXF4WDFH73") +// .name("legacy consistency test 1")); +// tests.add(LychiTestInstance.of("CC(C)NCC(O)COC1=CC=C(CCOCC2CC2)C=C1", "19W74QJNW-WXMWMLXXWD-WDPBV6R9GFJ-WDJKLYLWS5JW") +// .name("legacy consistency test 2")); +// tests.add(LychiTestInstance.of("CC1=C(CC(O)=O)C2=C(C=CC(F)=C2)\\C1=C/C3=CC=C(C=C3)S(C)(=O)=O", "4D13QHCQ6-6CKUM1H2QX-6XM1AWY81DJ-6XJV527T8L5X") +// .name("legacy consistency test 3")); + + + tests.add(LychiTestInstance.notEquivalent("C[C@@H]1CC[C@@H](C)CC1","C[C@H]1CC[C@@H](C)CC1").name("trans across ring")); + tests.add(LychiTestInstance.equivalentLayer3("C[C@@H]1CC[C@@H](C)CC1","C[C@H]1CC[C@@H](C)CC1").name("cis across ring")); + - tests.add(LychiTestInstance.of("CN(C)CCOC(C1=CC=CC=C1)C2=CC=CC=C2","SG1MX4TJL-LRQMG7F9KY-LYVJD4DSRGU-LYU23YRCSQTR").name("test lychi change")); tests.add(LychiTestInstance.equivalent("CC(C)(C)C1CCC2(CC1)CCN(CCCN3CCOCC3)CC2","NCGC00013953\n" + " -IDBS- 1129050841\n\n" + @@ -266,13 +276,7 @@ public static List data(){ tests.add(LychiTestInstance.equivalent("[H][C@@]12[C@@H]3SC[C@]4(NCCC5=C4C=C(OC)C(O)=C5)C(=O)OC[C@H](N1[C@@H](O)[C@@H]6CC7=C([C@H]2N6C)C(O)=C(OC)C(C)=C7)C8=C9OCOC9=C(C)C(OC(C)=O)=C38", "[H][C@@]12[C@@H]3SC[C@]4(NCCC5=C4C=C(OC)C(O)=C5)C(=O)OC[C@H](N1[C@@H](O)[C@@H]6CC7=C([C@H]2N6C)C(O)=C(OC)C(C)=C7)C8=C9OCOC9=C(C)C(OC(C)=O)=C38") .name("round trip problem")); - - tests.add(LychiTestInstance.of("[Na+].[Na+].[Na+].[Na+].[O-]P([O-])(=O)OP([O-])([O-])=O", "U42VPKYB8-83HRLLLGLV-8VMGB3AAA1L-8VLXF4WDFH73") - .name("legacy consistency test 1")); - tests.add(LychiTestInstance.of("CC(C)NCC(O)COC1=CC=C(CCOCC2CC2)C=C1", "19W74QJNW-WXMWMLXXWD-WDPBV6R9GFJ-WDJKLYLWS5JW") - .name("legacy consistency test 2")); - tests.add(LychiTestInstance.of("CC1=C(CC(O)=O)C2=C(C=CC(F)=C2)\\C1=C/C3=CC=C(C=C3)S(C)(=O)=O", "4D13QHCQ6-6CKUM1H2QX-6XM1AWY81DJ-6XJV527T8L5X") - .name("legacy consistency test 3")); + From 29574342e86566afa338468d6345af8a56f55151 Mon Sep 17 00:00:00 2001 From: Tyler Peryea Date: Thu, 2 May 2019 18:09:48 -0400 Subject: [PATCH 20/25] added tests --- src/test/java/lychi/LychiRegressionTest.java | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/test/java/lychi/LychiRegressionTest.java b/src/test/java/lychi/LychiRegressionTest.java index bfae60e..f73c5fa 100644 --- a/src/test/java/lychi/LychiRegressionTest.java +++ b/src/test/java/lychi/LychiRegressionTest.java @@ -209,6 +209,20 @@ public static List data(){ // .name("legacy consistency test 3")); + //C1CCNCC1.C2CCN=CC2 + + //CC1CCC=NC1.CN2CCCCC2 + + tests.add(LychiTestInstance.equivalent("CC1CCC=NC1","CN2CCCCC2") + .layer(1) + .name("Bond order and label change has same layer 1")); + + + tests.add(LychiTestInstance.equivalent("C1CCNCC1","C2CCN=CC2") + .layer(2) + .name("Bond order change gives same layer 2")); + + tests.add(LychiTestInstance.notEquivalent("C[C@@H]1CC[C@@H](C)CC1","C[C@H]1CC[C@@H](C)CC1").name("trans across ring")); tests.add(LychiTestInstance.equivalentLayer3("C[C@@H]1CC[C@@H](C)CC1","C[C@H]1CC[C@@H](C)CC1").name("cis across ring")); From eb5f0288f139cb0046057cea3c1ef64aacbac699 Mon Sep 17 00:00:00 2001 From: Tyler Peryea Date: Thu, 2 May 2019 18:10:25 -0400 Subject: [PATCH 21/25] basic code that may be used later --- src/main/java/lychi/LyChIStandardizer.java | 59 ++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/src/main/java/lychi/LyChIStandardizer.java b/src/main/java/lychi/LyChIStandardizer.java index 2e983fe..b8f771c 100644 --- a/src/main/java/lychi/LyChIStandardizer.java +++ b/src/main/java/lychi/LyChIStandardizer.java @@ -2869,6 +2869,58 @@ private static Molecule getLayer3Equivalent(Molecule m){ return mout; } + private static Molecule getLayer2Equivalent(Molecule m){ + Molecule m0=m.cloneMolecule(); + + int[] atno = new int[m0.getAtomCount()]; + for (int i = 0; i < atno.length; ++i) { + MolAtom a = m0.getAtom(i); + a.setRadical(0); + a.setCharge(0); + a.setFlags(0); + a.setMassno(0); + a.setAtomMap(i+1); + } + for (MolBond b : m0.getBondArray()) { + b.setStereo2Flags(b.getNode1(), b.getNode2(), 0); + if(b.isQuery()){ //hack + b.setFlags(1); + } + b.setType(1); //force single + } + Molecule mout = new Molecule(); + ChemUtil.canonicalSMILES(mout,m0,false); + + return mout; + } + + private static Molecule getLayer1Equivalent(Molecule m){ + Molecule m0=m.cloneMolecule(); + + int[] atno = new int[m0.getAtomCount()]; + for (int i = 0; i < atno.length; ++i) { + MolAtom a = m0.getAtom(i); + a.setRadical(0); + a.setCharge(0); + a.setFlags(0); + a.setMassno(0); + a.setAtno(6); + a.setAtomMap(i+1); + } + for (MolBond b : m0.getBondArray()) { + b.setStereo2Flags(b.getNode1(), b.getNode2(), 0); + if(b.isQuery()){ //hack + b.setFlags(1); + } + b.setType(1); //force single + } + Molecule mout = new Molecule(); + ChemUtil.canonicalSMILES(mout,m0,false); + + return mout; + } + + /** * Takes an int array and replaces each value with the position it would have in a sorted @@ -2934,6 +2986,13 @@ public static String[] hashKeyArray (Molecule input) { m0.removeNode(a); } +// String molstr1=ChemUtil.canonicalSMILES(getLayer1Equivalent(m0)); +// String molstr2=ChemUtil.canonicalSMILES(getLayer2Equivalent(m0)); +// String molstr3=ChemUtil.canonicalSMILES(getLayer3Equivalent(m0)); +// +// +// return hashChain45 (molstr1, molstr2, molstr3, molstr); + Molecule m1 = m0.cloneMolecule(); From a963b98344570eb229cf7ee4d0c38eb909dc0980 Mon Sep 17 00:00:00 2001 From: Tyler Peryea Date: Thu, 2 May 2019 21:45:46 -0400 Subject: [PATCH 22/25] changing to lazier layer computation using morgan's and simplified molecules --- src/main/java/lychi/LyChIStandardizer.java | 187 +++++++------------ src/test/java/lychi/LychiRegressionTest.java | 7 + 2 files changed, 75 insertions(+), 119 deletions(-) diff --git a/src/main/java/lychi/LyChIStandardizer.java b/src/main/java/lychi/LyChIStandardizer.java index b8f771c..a19b40e 100644 --- a/src/main/java/lychi/LyChIStandardizer.java +++ b/src/main/java/lychi/LyChIStandardizer.java @@ -5,6 +5,7 @@ import java.io.IOException; import java.io.InputStreamReader; import java.io.PrintStream; +import java.math.BigInteger; import java.security.MessageDigest; import java.util.ArrayList; import java.util.Arrays; @@ -2951,6 +2952,69 @@ public static void normalizeRanks(int[] rank){ }); } + /** + * Simple morgan's algorithm for graph invariants. This requires k*N operations + * where k is a constant that is large enough to "absorb" the whole graph (13 here). + * + * @param m + * @return + */ + public static long[] morgans(Molecule m){ + int MAX_ROUND = 13; + int[] atno = new int[m.getAtomCount()]; + for (int i = 0; i < atno.length; ++i) { + MolAtom a = m.getAtom(i); + atno[i]=a.getAtno(); + } + long[] rank; + { + + long[][] hash = new long[MAX_ROUND][atno.length]; + for (int i = 0; i < atno.length; ++i) + hash[0][i] = atno[i]; + + int round = 1; + for (; round < MAX_ROUND; ++round) { + int p = round - 1; + for (int i = 0; i < atno.length; ++i) { + MolAtom a = m.getAtom(i); + long ha = hash[p][i]; + for (int j = 0; j < a.getBondCount(); ++j) { + MolAtom xa = a.getBond(j).getOtherAtom(a); + int k = m.indexOf(xa); + ha += ha += (a.getBond(j).getType() << xa.getImplicitHcount()) + + hash[p][k]; + } + if (ha < 0) { + if (DEBUG) { + logger.log(Level.SEVERE, + "OVERFLOW AT ITERATION "+round+"!"); + } + ha = hash[round-1][i]; + } + hash[round][i] = ha; + } + } + rank = hash[round-1]; + } + return rank; + } + + /** + * Return the morgan's algorithm values as a sorted list, encoded + * as a string. + * @param m + * @return + */ + public static String morgansAsString(Molecule m){ + long[] order = Arrays.stream(morgans(m)) + .sorted() + .toArray(); + + String s=new BigInteger(BitSet.valueOf(order).toByteArray()).toString(64); + return s; + } + /** * Extended version of the hash key that includes the topology+label * layer that sits between the first and second layers of previous @@ -2986,127 +3050,12 @@ public static String[] hashKeyArray (Molecule input) { m0.removeNode(a); } -// String molstr1=ChemUtil.canonicalSMILES(getLayer1Equivalent(m0)); -// String molstr2=ChemUtil.canonicalSMILES(getLayer2Equivalent(m0)); -// String molstr3=ChemUtil.canonicalSMILES(getLayer3Equivalent(m0)); -// -// -// return hashChain45 (molstr1, molstr2, molstr3, molstr); - - - - Molecule m1 = m0.cloneMolecule(); - int[] atno = new int[m0.getAtomCount()]; - for (int i = 0; i < atno.length; ++i) { - MolAtom a = m0.getAtom(i); - atno[i] = a.getAtno(); - a.setAtno(6); - a.setRadical(0); - a.setCharge(0); - a.setFlags(0); - a.setMassno(0); - } - - for (MolBond b : m0.getBondArray()) { - b.setFlags(0); - b.setType(1); - } - - // level0: molecular skeleton... - String level0 = ChemUtil.canonicalSMILES (m0, false); - - StringBuilder sb = new StringBuilder (); - // level1: topology+atom label - - - int[] fallbackLookup = new int[atno.length]; - try{ - //set the tie-breaking priority based on the layer-3 information - Molecule stdLychi3Mol=getLayer3Equivalent(m1); - MolAtom[] matarr1=stdLychi3Mol.getAtomArray(); - - for (int i = 0; i < atno.length; ++i) { - fallbackLookup[matarr1[i].getAtomMap()-1]=i; - } - }catch(Exception e){ - logger.log(Level.SEVERE, - "Can't produce simplified structure from molecule", e); - } - -// int[] rank = new int[atno.length]; -// m0.getGrinv(rank); - - - int[] rank; - { - int MAX_ROUND = 13; - int[][] hash = new int[MAX_ROUND][atno.length]; - for (int i = 0; i < atno.length; ++i) - hash[0][i] = 1; - - int round = 1; - for (; round < MAX_ROUND; ++round) { - int p = round - 1; - for (int i = 0; i < atno.length; ++i) { - MolAtom a = m0.getAtom(i); - int ha = hash[p][i]; - for (int j = 0; j < a.getBondCount(); ++j) { - MolAtom xa = a.getBond(j).getOtherAtom(a); - int k = m0.indexOf(xa); - ha += (1 << xa.getBondCount()) + hash[p][k]; - } - if (ha < 0) { - if (DEBUG) { - logger.log(Level.SEVERE, - "OVERFLOW AT ITERATION "+round+"!"); - } - ha = hash[round-1][i]; - } - hash[round][i] = ha; - } - normalizeRanks(hash[round]); - } - rank = hash[round-1]; - } - + String molstr1=morgansAsString(getLayer1Equivalent(m0)); + String molstr2=morgansAsString(getLayer2Equivalent(m0)); + String molstr3=morgansAsString(getLayer3Equivalent(m0)); - for (int i = 0; i < atno.length; ++i) { - rank[i] = (rank[i]*2048); // update rank to resolve symmetry - rank[i] += fallbackLookup[i]; //tie breaking based on lychi-3 fallback order - } - // System.out.println(Arrays.toString(rank)); - - - - for (AtomIterator ai = new AtomIterator (m0, rank); - ai.hasNext(); ai.next()) { - int index = ai.nextIndex(); - sb.append(MolAtom.symbolOf(atno[index])); - } - String level1 = sb.toString(); - - // level2: topology+atom label+bond order - sb = new StringBuilder (); - for (AtomIterator ai =new AtomIterator (m1, rank); ai.hasNext(); ) { - MolAtom a = ai.next(); - - sb.append(a.getSymbol()+a.getImplicitHcount()); - - } - // level1: skeleton with atom label - String level2 = sb.toString(); - - // level2: full canonical smiles with stereo/isotope/charge... - String level3 = molstr; - if (DEBUG) { - logger.info("hash layers:\n"+ - "0: "+level0 + "\n"+ - "1: "+level1 + "\n"+ - "2: "+level2 + "\n"+ - "3: "+level3 + "\n"); - } - return hashChain45 (level0, level1, level2, level3); + return hashChain45 (molstr1, molstr2, molstr3, molstr); } static String[] hashChain45 (String... strs) { diff --git a/src/test/java/lychi/LychiRegressionTest.java b/src/test/java/lychi/LychiRegressionTest.java index f73c5fa..eb9f09b 100644 --- a/src/test/java/lychi/LychiRegressionTest.java +++ b/src/test/java/lychi/LychiRegressionTest.java @@ -213,6 +213,13 @@ public static List data(){ //CC1CCC=NC1.CN2CCCCC2 + //C1CNC=NC1 null 89PQ2A6F3-3YP3KUHJB2-327C672T4UY-32Y8FMA9NT4T + //C1CC=NCN1 null 89PQ2A6F3-3Y4RF1LMSJ-3JHRC7TP268-3J8QGY5AGS5T + + tests.add(LychiTestInstance.equivalent("C1CNC=NC1","C1CC=NCN1") + .layer(2) + .name("Bond order migration on symmetrical structure gives same layer 2")); + tests.add(LychiTestInstance.equivalent("CC1CCC=NC1","CN2CCCCC2") .layer(1) .name("Bond order and label change has same layer 1")); From d3b8afee2894435d5a9cff18950e65517812d58b Mon Sep 17 00:00:00 2001 From: Tyler Peryea Date: Wed, 8 May 2019 18:09:53 -0400 Subject: [PATCH 23/25] fixed symmetry check for cages --- src/main/java/lychi/LyChIStandardizer.java | 81 +- src/test/java/lychi/LychiRegressionTest.java | 1108 ++++++++++-------- 2 files changed, 678 insertions(+), 511 deletions(-) diff --git a/src/main/java/lychi/LyChIStandardizer.java b/src/main/java/lychi/LyChIStandardizer.java index a19b40e..f3eb801 100644 --- a/src/main/java/lychi/LyChIStandardizer.java +++ b/src/main/java/lychi/LyChIStandardizer.java @@ -15,6 +15,7 @@ import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -1186,38 +1187,66 @@ else if (chiral != 0) { if(!"true".equals(igprop)){ m.setProperty("IGNORE_COMPLEX", "true"); - Set rings = new HashSet(); + Set rings = new LinkedHashSet(); + + int maxRing=0; - int[][] sssr=m.getSSSR(); for(MolAtom ma:nonChiralStereo.keySet()){ + maxRing=Math.max(maxRing, ma.sringsize()); + } + + int[][] sssr=m.getNonAromrings(maxRing); + for(MolAtom ma:nonChiralStereo.keySet()){ + int mm =ma.sringsize(); //need to find all atoms in the ring int im=m.indexOf(ma); - for(int[] ir:sssr){ - for(int i=0;i ratoms=Arrays.stream(rr) - .mapToObj(i->m.getAtom(i)) - .collect(Collectors.toSet()); + .mapToObj(i->m.getAtom(i)) + .collect(Collectors.toCollection(()->new LinkedHashSet())); MolBond[] bonds=ratoms.stream() .filter(a->!chirality.containsKey(a)) - .flatMap(a->IntStream.range(0, a.getEdgeCount()).mapToObj(i->a.getEdge(i))) - .filter(e->!ratoms.contains(e.getNode1()) || !ratoms.contains(e.getNode2())) - .map(b->(MolBond)b) - .filter(b->b.getType()==1) - .peek(b->{ - if(ratoms.contains(b.getAtom1()))b.swap(); - }) - .toArray(i->new MolBond[i]); - + .flatMap(a->IntStream.range(0, a.getEdgeCount()).mapToObj(i->a.getEdge(i))) + .filter(e->!ratoms.contains(e.getNode1()) || !ratoms.contains(e.getNode2())) + .map(b->(MolBond)b) + .filter(b->b.getType()==1) + .peek(b->{ + if(ratoms.contains(b.getAtom2()))b.swap(); + }) + .toArray(i->new MolBond[i]); + + + MolBond[] bondsInRing=ratoms.stream() + .filter(a->!chirality.containsKey(a)) + .flatMap(a->IntStream.range(0, a.getEdgeCount()).mapToObj(i->a.getEdge(i))) + .filter(e->{ + boolean inRing=ratoms.contains(e.getNode1()) && ratoms.contains(e.getNode2()); + return inRing; + }) + .map(b->(MolBond)b) + .filter(b->b.getType()==1) + .toArray(i->new MolBond[i]); + + + int[] oldPar = new int[bondsInRing.length]; + + for(int i=0;i>j&1)==1){ + if(((i>>j)&1)==1){ onOff.set(j*2); bonds[j].setFlags(MolBond.UP, MolBond.STEREO1_MASK); }else{ @@ -1249,9 +1278,12 @@ else if (chiral != 0) { Molecule mclone=m.cloneMolecule(); //(new LyChIStandardizer()).standardize(mclone); String hash1=LyChIStandardizer.hashKey(mclone); + allPossible.add(hash1); onOff.or(bs); + //System.out.println(mclone.toFormat("mol")); + if(onOff.cardinality() == bs.cardinality()){ currentPossible.add(hash1); } @@ -1273,7 +1305,10 @@ else if (chiral != 0) { bonds[j].setFlags(0, MolBond.STEREO1_MASK); } } - } + } + for(int i=0;i iatoms=IntStream.range(0, m.getAtomCount()).mapToObj(i2->i2).collect(Collectors.toList()); - Collections.shuffle(iatoms, r); - int[] map =iatoms.stream().mapToInt(i1->i1).toArray(); - Molecule s=shuffleMolecule(m,map); - - basicTest(s,spec.expectedLychi,spec.shouldMatch, spec.layer); - } - } - - //@Test - public void daisyChainLychiAfter10Times() throws Exception{ - Molecule m=spec.getMolecule(); - m.clean(2, null); - for (int i=0;i<10;i++){ - - List iatoms=IntStream.range(0, m.getAtomCount()).mapToObj(i2->i2).collect(Collectors.toList()); - Collections.shuffle(iatoms); - int[] map =iatoms.stream().mapToInt(i1->i1).toArray(); - Molecule s=shuffleMolecule(m,map); - basicTest(s,spec.expectedLychi,spec.shouldMatch, spec.layer); - m=s; - } - } - - - @Parameterized.Parameters(name = "{0}") - public static List data(){ - List tests = new ArrayList<>(); - -// tests.add(LychiTestInstance.of("CCCCCC","U28WVSD82-2YWKXKS36P-2PQPUKLGUWW-2PWK7BQNJSU6").name("simple carbon chain")); -// tests.add(LychiTestInstance.of("O=C(O[C@H]1C[C@H]2C[C@H]3C[C@@H](C1)N2CC3=O)C4=CNC5=C4C=CC=C5","38C4U16JU-UC5KDUPMVH-UHFJLJL661C-UHCRHDK74DXU").name("cage-like structure")); -// tests.add(LychiTestInstance.of("CN(C)CCOC(C1=CC=CC=C1)C2=CC=CC=C2","SG1MX4TJL-LRQMG7F9KY-LYVJD4DSRGU-LYU23YRCSQTR").name("test lychi change")); -// -// tests.add(LychiTestInstance.of("[Na+].[Na+].[Na+].[Na+].[O-]P([O-])(=O)OP([O-])([O-])=O", "U42VPKYB8-83HRLLLGLV-8VMGB3AAA1L-8VLXF4WDFH73") -// .name("legacy consistency test 1")); -// tests.add(LychiTestInstance.of("CC(C)NCC(O)COC1=CC=C(CCOCC2CC2)C=C1", "19W74QJNW-WXMWMLXXWD-WDPBV6R9GFJ-WDJKLYLWS5JW") + public LychiRegressionTest(String ignored, LychiTestInstance spec){ + this.spec = spec; + } + + public static void basicTest(Molecule m, String expected, boolean match, int layerMatch) throws Exception{ + LyChIStandardizer std = new LyChIStandardizer(); + std.standardize(m); + String fullKey=LyChIStandardizer.hashKey(m); + + String layer = Arrays.stream(fullKey.split("-")) + .limit(layerMatch) + .collect(Collectors.joining("-")); + String expectedLayer = Arrays.stream(expected.split("-")) + .limit(layerMatch) + .collect(Collectors.joining("-")); + + if(match){ + assertEquals(expectedLayer,layer); + }else{ + assertNotEquals(expectedLayer,layer); + } + } + + public static Molecule shuffleMolecule(Molecule m, int[] map){ + MolAtom[] mas=m.getAtomArray(); + MolBond[] mbs=m.getBondArray(); + + Molecule m2=new Molecule(); + + int[] rmap = new int[map.length]; + + for(int i=0;i iatoms=IntStream.range(0, m.getAtomCount()).mapToObj(i2->i2).collect(Collectors.toList()); + Collections.shuffle(iatoms, r); + int[] map =iatoms.stream().mapToInt(i1->i1).toArray(); + Molecule s=shuffleMolecule(m,map); + + basicTest(s,spec.expectedLychi,spec.shouldMatch, spec.layer); + } + }catch(Throwable e){ + if(!spec.shouldFail()){ + throw e; + } + threw=true; + } + + assertEquals(spec.shouldFail(),threw); + } + + //@Test + public void daisyChainLychiAfter10Times() throws Exception{ + Molecule m=spec.getMolecule(); + m.clean(2, null); + for (int i=0;i<10;i++){ + + List iatoms=IntStream.range(0, m.getAtomCount()).mapToObj(i2->i2).collect(Collectors.toList()); + Collections.shuffle(iatoms); + int[] map =iatoms.stream().mapToInt(i1->i1).toArray(); + Molecule s=shuffleMolecule(m,map); + basicTest(s,spec.expectedLychi,spec.shouldMatch, spec.layer); + m=s; + } + } + + + @Parameterized.Parameters(name = "{0}") + public static List data(){ + List tests = new ArrayList<>(); + +// tests.add(LychiTestInstance.of("CCCCCC","U28WVSD82-2YWKXKS36P-2PQPUKLGUWW-2PWK7BQNJSU6").name("simple carbon chain")); +// tests.add(LychiTestInstance.of("O=C(O[C@H]1C[C@H]2C[C@H]3C[C@@H](C1)N2CC3=O)C4=CNC5=C4C=CC=C5","38C4U16JU-UC5KDUPMVH-UHFJLJL661C-UHCRHDK74DXU").name("cage-like structure")); +// tests.add(LychiTestInstance.of("CN(C)CCOC(C1=CC=CC=C1)C2=CC=CC=C2","SG1MX4TJL-LRQMG7F9KY-LYVJD4DSRGU-LYU23YRCSQTR").name("test lychi change")); +// +// tests.add(LychiTestInstance.of("[Na+].[Na+].[Na+].[Na+].[O-]P([O-])(=O)OP([O-])([O-])=O", "U42VPKYB8-83HRLLLGLV-8VMGB3AAA1L-8VLXF4WDFH73") +// .name("legacy consistency test 1")); +// tests.add(LychiTestInstance.of("CC(C)NCC(O)COC1=CC=C(CCOCC2CC2)C=C1", "19W74QJNW-WXMWMLXXWD-WDPBV6R9GFJ-WDJKLYLWS5JW") // .name("legacy consistency test 2")); -// tests.add(LychiTestInstance.of("CC1=C(CC(O)=O)C2=C(C=CC(F)=C2)\\C1=C/C3=CC=C(C=C3)S(C)(=O)=O", "4D13QHCQ6-6CKUM1H2QX-6XM1AWY81DJ-6XJV527T8L5X") -// .name("legacy consistency test 3")); - - - //C1CCNCC1.C2CCN=CC2 - - //CC1CCC=NC1.CN2CCCCC2 - - //C1CNC=NC1 null 89PQ2A6F3-3YP3KUHJB2-327C672T4UY-32Y8FMA9NT4T - //C1CC=NCN1 null 89PQ2A6F3-3Y4RF1LMSJ-3JHRC7TP268-3J8QGY5AGS5T +// tests.add(LychiTestInstance.of("CC1=C(CC(O)=O)C2=C(C=CC(F)=C2)\\C1=C/C3=CC=C(C=C3)S(C)(=O)=O", "4D13QHCQ6-6CKUM1H2QX-6XM1AWY81DJ-6XJV527T8L5X") +// .name("legacy consistency test 3")); + + + //C1CCNCC1.C2CCN=CC2 + + //CC1CCC=NC1.CN2CCCCC2 + + //C1CNC=NC1 null 89PQ2A6F3-3YP3KUHJB2-327C672T4UY-32Y8FMA9NT4T + //C1CC=NCN1 null 89PQ2A6F3-3Y4RF1LMSJ-3JHRC7TP268-3J8QGY5AGS5T - tests.add(LychiTestInstance.equivalent("C1CNC=NC1","C1CC=NCN1") - .layer(2) - .name("Bond order migration on symmetrical structure gives same layer 2")); - - tests.add(LychiTestInstance.equivalent("CC1CCC=NC1","CN2CCCCC2") - .layer(1) - .name("Bond order and label change has same layer 1")); - - - tests.add(LychiTestInstance.equivalent("C1CCNCC1","C2CCN=CC2") - .layer(2) - .name("Bond order change gives same layer 2")); - - - tests.add(LychiTestInstance.notEquivalent("C[C@@H]1CC[C@@H](C)CC1","C[C@H]1CC[C@@H](C)CC1").name("trans across ring")); - tests.add(LychiTestInstance.equivalentLayer3("C[C@@H]1CC[C@@H](C)CC1","C[C@H]1CC[C@@H](C)CC1").name("cis across ring")); - - - - tests.add(LychiTestInstance.equivalent("CC(C)(C)C1CCC2(CC1)CCN(CCCN3CCOCC3)CC2","NCGC00013953\n" + - " -IDBS- 1129050841\n\n" + - " 24 26 0 0 0 0 0 0 0 0999 V2000\n" + - " 0.2296 -3.5406 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " -0.5954 -3.5406 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " -1.4204 -3.5406 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " -0.5954 -4.3656 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " -0.5954 -2.7156 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 0.1191 -2.3031 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 0.1191 -1.4781 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " -0.5954 -1.0656 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " -1.3099 -1.4781 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " -1.3099 -2.3031 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 0.1191 -0.6531 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 0.1191 0.1719 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " -0.5954 0.5844 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " -0.5954 1.4094 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 0.1191 1.8219 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 0.1191 2.6469 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 0.8335 3.0594 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 1.5480 2.6469 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 2.2625 3.0594 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 2.2625 3.8844 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 1.5480 4.2969 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 0.8335 3.8844 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " -1.3099 0.1719 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " -1.3099 -0.6531 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 1 2 1 0 0 0\n" + - " 2 3 1 0 0 0\n" + - " 2 4 1 0 0 0\n" + - " 5 2 1 1 0 0\n" + - " 5 6 1 0 0 0\n" + - " 6 7 1 0 0 0\n" + - " 8 7 1 6 0 0\n" + - " 8 9 1 0 0 0\n" + - " 9 10 1 0 0 0\n" + - " 5 10 1 0 0 0\n" + - " 8 11 1 0 0 0\n" + - " 11 12 1 0 0 0\n" + - " 12 13 1 0 0 0\n" + - " 13 14 1 0 0 0\n" + - " 14 15 1 0 0 0\n" + - " 15 16 1 0 0 0\n" + - " 16 17 1 0 0 0\n" + - " 17 18 1 0 0 0\n" + - " 18 19 1 0 0 0\n" + - " 19 20 1 0 0 0\n" + - " 20 21 1 0 0 0\n" + - " 21 22 1 0 0 0\n" + - " 17 22 1 0 0 0\n" + - " 13 23 1 0 0 0\n" + - " 23 24 1 0 0 0\n" + - " 8 24 1 0 0 0\n" + - "M END").name("spiro stereo without meaning should not change lychi")); - - - //tests.add(LychiTestInstance.equivalentLayer3("CC(C)(CO)[C@@H](O)C(=O)NCCC(O)=O","CC(C)(CO)[CH](O)C(=O)NCCC(O)=O").name("layer 3 the same when only stereo changes")); - //tests.add(LychiTestInstance.equivalentLayer3("CCCCCCCCCCCCCC.CC(C)(CO)[C@@H](O)C(=O)NCCC(O)=O","CCCCCCCCCCCCCC.CC(C)(CO)[CH](O)C(=O)NCCC(O)=O").name("rare salt should be stripped, regardless of stereo")); - - tests.add(LychiTestInstance.equivalent("[H][C@@]12[C@@H]3SC[C@]4(NCCC5=C4C=C(OC)C(O)=C5)C(=O)OC[C@H](N1[C@@H](O)[C@@H]6CC7=C([C@H]2N6C)C(O)=C(OC)C(C)=C7)C8=C9OCOC9=C(C)C(OC(C)=O)=C38", "[H][C@@]12[C@@H]3SC[C@]4(NCCC5=C4C=C(OC)C(O)=C5)C(=O)OC[C@H](N1[C@@H](O)[C@@H]6CC7=C([C@H]2N6C)C(O)=C(OC)C(C)=C7)C8=C9OCOC9=C(C)C(OC(C)=O)=C38") - .name("round trip problem")); - - - - - - tests.add(LychiTestInstance.equivalent("\n" + - " Ketcher 12201304332D 1 1.00000 0.00000 0\n" + - "\n" + - " 59 67 0 1 0 999 V2000\n" + - " -2.2321 -1.8660 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " -1.7321 -1.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " -2.5981 -0.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " -2.5981 0.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " -3.4641 1.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " -3.4641 2.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " -4.3301 2.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " -2.5981 2.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " -2.5981 3.5000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " -3.4641 4.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " -1.7321 2.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " -0.8660 2.5000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " -1.7321 1.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " -0.8660 0.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " -0.4740 1.2647 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " -1.7321 0.0000 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " -0.9071 -0.4750 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 0.0000 1.0000 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 0.0000 -1.0000 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " -0.8660 -1.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " -0.8660 -2.5000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 0.8660 -1.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 0.8561 -2.3746 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 2.4488 -3.1947 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 4.5544 -3.0234 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 5.3132 -1.2000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 6.5741 -1.3179 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 4.8632 0.2250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 4.0294 0.9234 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 2.9488 1.1197 0.0000 S 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 0.8660 0.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 0.8811 1.3246 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 1.7321 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 2.5981 0.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 2.4244 1.4848 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 1.6097 2.0768 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 0.7419 1.9858 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 1.7927 2.9165 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 3.4641 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 3.9301 0.3000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 3.4641 -1.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 4.2072 -1.6691 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 3.8005 -2.5827 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 2.8060 -2.4781 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 2.5981 -1.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 1.7321 -1.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 5.6506 -0.2222 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 6.4172 0.1894 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 6.4966 1.1232 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 5.8342 1.6954 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 6.0136 2.6792 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 5.2512 3.3264 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 5.4306 4.3102 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 4.3096 2.9897 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 3.5473 3.6370 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 3.7266 4.6207 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 4.1303 2.0060 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 4.8676 1.3838 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 2 1 1 1 0 0\n" + - " 2 3 1 0 0 0\n" + - " 3 4 1 0 0 0\n" + - " 4 5 1 0 0 0\n" + - " 5 6 2 0 0 0\n" + - " 6 7 1 0 0 0\n" + - " 6 8 1 0 0 0\n" + - " 8 9 1 0 0 0\n" + - " 9 10 1 0 0 0\n" + - " 8 11 2 0 0 0\n" + - " 11 12 1 0 0 0\n" + - " 11 13 1 0 0 0\n" + - " 4 13 2 0 0 0\n" + - " 13 14 1 0 0 0\n" + - " 14 15 1 1 0 0\n" + - " 14 16 1 0 0 0\n" + - " 2 16 1 0 0 0\n" + - " 16 17 1 0 0 0\n" + - " 14 18 1 0 0 0\n" + - " 18 19 1 1 0 0\n" + - " 18 20 1 0 0 0\n" + - " 20 21 1 0 0 0\n" + - " 2 21 1 0 0 0\n" + - " 21 22 1 1 0 0\n" + - " 20 23 1 0 0 0\n" + - " 23 24 1 1 0 0\n" + - " 23 25 1 0 0 0\n" + - " 25 26 1 0 0 0\n" + - " 26 27 1 0 0 0\n" + - " 27 28 2 0 0 0\n" + - " 29 27 1 0 0 0\n" + - " 29 30 1 6 0 0\n" + - " 30 31 1 0 0 0\n" + - " 31 32 1 0 0 0\n" + - " 18 32 1 0 0 0\n" + - " 32 33 1 1 0 0\n" + - " 32 34 1 0 0 0\n" + - " 34 35 1 0 0 0\n" + - " 35 36 1 0 0 0\n" + - " 36 37 1 0 0 0\n" + - " 37 38 1 0 0 0\n" + - " 37 39 2 0 0 0\n" + - " 35 40 2 0 0 0\n" + - " 40 41 1 0 0 0\n" + - " 40 42 1 0 0 0\n" + - " 42 43 1 0 0 0\n" + - " 43 44 1 0 0 0\n" + - " 44 45 1 0 0 0\n" + - " 45 46 1 0 0 0\n" + - " 42 46 2 0 0 0\n" + - " 46 47 1 0 0 0\n" + - " 23 47 1 0 0 0\n" + - " 34 47 2 0 0 0\n" + - " 29 48 1 0 0 0\n" + - " 48 49 1 0 0 0\n" + - " 49 50 1 0 0 0\n" + - " 50 51 1 0 0 0\n" + - " 51 52 1 0 0 0\n" + - " 52 53 2 0 0 0\n" + - " 53 54 1 0 0 0\n" + - " 53 55 1 0 0 0\n" + - " 55 56 1 0 0 0\n" + - " 56 57 1 0 0 0\n" + - " 55 58 2 0 0 0\n" + - " 58 59 1 0 0 0\n" + - " 29 59 1 0 0 0\n" + - " 51 59 2 0 0 0\n" + - "M END", "[H][C@@]12CC3=C(C(O)=C(OC)C(C)=C3)[C@@]([H])(N1C)[C@@]4([H])N([C@H]2O)[C@@]5([H])COC(=O)[C@]8(CS[C@]4([H])C6=C5C7=C(OCO7)C(C)=C6OC(C)=O)NCCC9=C8C=C(OC)C(O)=C9").name("strereo parity issue 1")); - //C(C)1CCC(C)CC1 - - tests.add(LychiTestInstance.equivalent("[C@H](C)1CCC(C)CC1","[C@@H](C)1CCC(C)CC1").name("meaningless stereo on a ring the same as opposite meaningless stereo on ring")); - - tests.add(LychiTestInstance.equivalent("C[C@H]1CC[C@@H](C)CC1","C[C@@H]1CC[C@H](C)CC1").name("opposite form of cis/trans on ring should be the same")); - - tests.add(LychiTestInstance.notEquivalent("C[C@H]1CC[C@@H](C)CC1","C[C@H]1CC[C@H](C)CC1").name("cis across ring is different from trans across ring")); - - - - - //O[C@H]1CC(O)CC(O)C1 - tests.add(LychiTestInstance.equivalent("O[C@H]1CC(O)CC(O)C1","O[C@@H]1CC(O)CC(O)C1") - .name("3-center, 1 specified meaningless center should be same as inverted")); - - - - //OC1C(O)C(O)C(O)[C@@H](O)[C@H]1O - tests.add(LychiTestInstance.equivalent("OC1C(O)C(O)C(O)[C@@H](O)[C@H]1O","OC1C(O)C(O)C(O)[C@H](O)[C@@H]1O").name("semi-meaningful symmetric stereo honored")); - tests.add(LychiTestInstance.notEquivalent("OC1C(O)C(O)C(O)[C@@H](O)[C@H]1O","OC1C(O)C(O)C(O)[C@@H](O)[C@@H]1O").name("distinct semi-meaningful symmetric stereo honored")); - - tests.add(LychiTestInstance.equivalent("OC1[C@H](O)[C@H](O)C1O","OC1[C@@H](O)[C@@H](O)C1O").name("4-center, 2 specified symmetric meaningful stereo should be same as inverted")); - tests.add(LychiTestInstance.notEquivalent("OC1[C@H](O)[C@H](O)C1O","OC1[C@@H](O)[C@H](O)C1O").name("4-center, 2 specified symmetric meaningful stereo should not be same as 1 center modified")); - - - - //OC1[C@H](O)[C@H](O)C1O - - //C[C@H]1OC(C)O[C@@H](C)O1 - //[#6][C@H]1C[C@@H]([#6])CC([#6])C1.[#6][C@H]2CC([#6])C[C@@H]([#6])C2 - tests.add(LychiTestInstance.equivalent("\n" + - " MJ150420 \n" + - "\n" + - " 8 8 0 0 0 0 0 0 0 0999 V2000\n" + - " -2.2656 0.8138 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " -2.9801 0.4013 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " -2.9801 -0.4237 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " -2.2656 -0.8361 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " -1.5511 -0.4237 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " -1.5511 0.4013 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " -0.8366 0.8138 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " -0.8366 -0.0111 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" + - " 1 2 1 0 0 0 0\n" + - " 1 6 1 0 0 0 0\n" + - " 2 3 1 0 0 0 0\n" + - " 3 4 1 0 0 0 0\n" + - " 4 5 1 0 0 0 0\n" + - " 5 6 1 0 0 0 0\n" + - " 6 7 1 1 0 0 0\n" + - " 6 8 1 6 0 0 0\n" + - "M END","C1CCCCC1").name("meaningless stereo 1")); - - - tests.add(LychiTestInstance.equivalentLayer3("[H][C@@](O)(CO)[C@@]([H])(O)[C@]([H])(O)[C@@]([H])(O)C=O", "[H][C@](O)(C=O)[C@@]([H])(O)[C@]([H])(O)[C@]([H])(O)C([2H])([2H])O") - .name("Hydrogen Isotope Same Layer 3") - ); - + //OCCO[SH](=O)=O + - tests.add(LychiTestInstance.equivalentLayer3("[H][C@]1(CC(O)=O)CCC2=C1N(CC3=CC=C(Cl)C=C3)C4=C2C=C(F)C=C4S(C)(=O)=O", "CS(=O)(=O)C1=CC(F)=CC2=C1N(CC3=CC=C(Cl)C=C3)C4=C2CCC4CC(O)=O") + //this one breaks right now, but shouldn't + tests.add(LychiTestInstance.notEquivalent("\n" + + " JSDraw205071917562D\n" + + "\n" + + " 10 10 0 0 1 0 999 V2000\n" + + " 25.9490 -11.2060 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 25.9490 -9.6460 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 27.3001 -8.8660 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 27.3001 -7.3060 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 25.9490 -6.5260 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 24.5980 -7.3060 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 24.5980 -8.8660 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 25.9490 -4.9660 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 27.3001 -4.1860 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 24.5980 -4.1860 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 2 1 1 6 0 0 0\n" + + " 2 3 1 0 0 0 0\n" + + " 3 4 1 0 0 0 0\n" + + " 4 5 1 0 0 0 0\n" + + " 5 6 1 0 0 0 0\n" + + " 6 7 1 0 0 0 0\n" + + " 2 7 1 0 0 0 0\n" + + " 5 8 2 0 0 0 0\n" + + " 8 9 1 0 0 0 0\n" + + " 8 10 1 0 0 0 0\n" + + "M END", + "\n" + + " JSDraw205071918262D\n" + + "\n" + + " 10 10 0 0 1 0 999 V2000\n" + + " 25.9490 -11.2060 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 25.9490 -9.6460 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 27.3001 -8.8660 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 27.3001 -7.3060 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 25.9490 -6.5260 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 24.5980 -7.3060 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 24.5980 -8.8660 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 25.9490 -4.9660 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 27.3001 -4.1860 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 24.5980 -4.1860 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 2 1 1 1 0 0 0\n" + + " 2 3 1 0 0 0 0\n" + + " 3 4 1 0 0 0 0\n" + + " 4 5 1 0 0 0 0\n" + + " 5 6 1 0 0 0 0\n" + + " 6 7 1 0 0 0 0\n" + + " 2 7 1 0 0 0 0\n" + + " 5 8 2 0 0 0 0\n" + + " 8 9 1 0 0 0 0\n" + + " 8 10 1 0 0 0 0\n" + + "M END") + .layer(4) + .markToFail() + .name("stereo locked into configuration across ring with double bond should be real")); + + + //C[C@H]1CC[C@]2(CC1)CC[C@H](C)CC2 + tests.add(LychiTestInstance.notEquivalent("C[C@H]1CC[C@]2(CC1)CC[C@H](C)CC2", + "CC1CCC2(CC1)CCC(C)CC2") + .layer(4) + .markToFail() + .name("allene-like ring stereo should be honored")); + + + //C1[C@H]2C[C@@H]3C[C@H]1CC(C2)C3 + //C1C2CC3CC1CC(C2)C3 + + tests.add(LychiTestInstance.equivalent("C1[C@H]2C[C@@H]3C[C@H]1CC(C2)C3", + "C1C2CC3CC1CC(C2)C3") + .layer(4) + .name("adamantane stereo shouldn't be real")); + + + + // + //NC12CC3CC(C1)CC(O)(C3)C2 + tests.add(LychiTestInstance.notEquivalent("N[C@]12CC3C[C@@H](C1)C[C@@](O)(C3)C2", + "NC12CC3CC(C1)CC(O)(C3)C2") + .layer(4) + .name("substituted adamantane stereo should be real")); + + tests.add(LychiTestInstance.equivalent("CNCCC[C@@]12CC[C@@H](C=3C1=CC=CC3)C=4C2=CC=CC4", + "CNCCC[C@@]12CC[C@@H](C=3C1=CC=CC3)C=4C2=CC=CC4") + .layer(4) + .name("bridgehead should always be the same")); + + + tests.add(LychiTestInstance.equivalent("NNC(=O)C1=CC=NC=C1", + "NNC(=O)C1=CC=NC=C1.NC2=CC=C(C(O)=O)C(O)=C2") + .layer(3) + .name("aminosalycilate salt should be stripped")); + + + + + tests.add(LychiTestInstance.equivalent("OCCS(O)(=O)=O.OCCS(O)(=O)=O.NC(=N)C1=CC=C(OCCCCCOC2=CC=C(C=C2)C(N)=N)C=C1", + "OCCS(O)(=O)=O.NC(=N)C1=CC=C(OCCCCCOC2=CC=C(C=C2)C(N)=N)C=C1") + .layer(3) + .name("2 non-equivalent salt forms are not the same")); + + tests.add(LychiTestInstance.equivalent("C1CNC=NC1","C1CC=NCN1") + .layer(2) + .name("Bond order migration on symmetrical structure gives same layer 2")); + + tests.add(LychiTestInstance.equivalent("CC1CCC=NC1","CN2CCCCC2") + .layer(1) + .name("Bond order and label change has same layer 1")); + + + tests.add(LychiTestInstance.equivalent("C1CCNCC1","C2CCN=CC2") + .layer(2) + .name("Bond order change gives same layer 2")); + + + tests.add(LychiTestInstance.notEquivalent("C[C@@H]1CC[C@@H](C)CC1","C[C@H]1CC[C@@H](C)CC1").name("trans across ring")); + tests.add(LychiTestInstance.equivalentLayer3("C[C@@H]1CC[C@@H](C)CC1","C[C@H]1CC[C@@H](C)CC1").name("cis across ring")); + + + + tests.add(LychiTestInstance.equivalent("CC(C)(C)C1CCC2(CC1)CCN(CCCN3CCOCC3)CC2","NCGC00013953\n" + + " -IDBS- 1129050841\n\n" + + " 24 26 0 0 0 0 0 0 0 0999 V2000\n" + + " 0.2296 -3.5406 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.5954 -3.5406 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -1.4204 -3.5406 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.5954 -4.3656 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.5954 -2.7156 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.1191 -2.3031 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.1191 -1.4781 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.5954 -1.0656 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -1.3099 -1.4781 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -1.3099 -2.3031 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.1191 -0.6531 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.1191 0.1719 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.5954 0.5844 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.5954 1.4094 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.1191 1.8219 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.1191 2.6469 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.8335 3.0594 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 1.5480 2.6469 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 2.2625 3.0594 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 2.2625 3.8844 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 1.5480 4.2969 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.8335 3.8844 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -1.3099 0.1719 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -1.3099 -0.6531 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 1 2 1 0 0 0\n" + + " 2 3 1 0 0 0\n" + + " 2 4 1 0 0 0\n" + + " 5 2 1 1 0 0\n" + + " 5 6 1 0 0 0\n" + + " 6 7 1 0 0 0\n" + + " 8 7 1 6 0 0\n" + + " 8 9 1 0 0 0\n" + + " 9 10 1 0 0 0\n" + + " 5 10 1 0 0 0\n" + + " 8 11 1 0 0 0\n" + + " 11 12 1 0 0 0\n" + + " 12 13 1 0 0 0\n" + + " 13 14 1 0 0 0\n" + + " 14 15 1 0 0 0\n" + + " 15 16 1 0 0 0\n" + + " 16 17 1 0 0 0\n" + + " 17 18 1 0 0 0\n" + + " 18 19 1 0 0 0\n" + + " 19 20 1 0 0 0\n" + + " 20 21 1 0 0 0\n" + + " 21 22 1 0 0 0\n" + + " 17 22 1 0 0 0\n" + + " 13 23 1 0 0 0\n" + + " 23 24 1 0 0 0\n" + + " 8 24 1 0 0 0\n" + + "M END").name("spiro stereo without meaning should not change lychi")); + + + //tests.add(LychiTestInstance.equivalentLayer3("CC(C)(CO)[C@@H](O)C(=O)NCCC(O)=O","CC(C)(CO)[CH](O)C(=O)NCCC(O)=O").name("layer 3 the same when only stereo changes")); + //tests.add(LychiTestInstance.equivalentLayer3("CCCCCCCCCCCCCC.CC(C)(CO)[C@@H](O)C(=O)NCCC(O)=O","CCCCCCCCCCCCCC.CC(C)(CO)[CH](O)C(=O)NCCC(O)=O").name("rare salt should be stripped, regardless of stereo")); + + tests.add(LychiTestInstance.equivalent("[H][C@@]12[C@@H]3SC[C@]4(NCCC5=C4C=C(OC)C(O)=C5)C(=O)OC[C@H](N1[C@@H](O)[C@@H]6CC7=C([C@H]2N6C)C(O)=C(OC)C(C)=C7)C8=C9OCOC9=C(C)C(OC(C)=O)=C38", "[H][C@@]12[C@@H]3SC[C@]4(NCCC5=C4C=C(OC)C(O)=C5)C(=O)OC[C@H](N1[C@@H](O)[C@@H]6CC7=C([C@H]2N6C)C(O)=C(OC)C(C)=C7)C8=C9OCOC9=C(C)C(OC(C)=O)=C38") + .name("round trip problem")); + + + + + + tests.add(LychiTestInstance.equivalent("\n" + + " Ketcher 12201304332D 1 1.00000 0.00000 0\n" + + "\n" + + " 59 67 0 1 0 999 V2000\n" + + " -2.2321 -1.8660 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -1.7321 -1.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -2.5981 -0.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -2.5981 0.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -3.4641 1.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -3.4641 2.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -4.3301 2.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -2.5981 2.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -2.5981 3.5000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -3.4641 4.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -1.7321 2.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.8660 2.5000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -1.7321 1.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.8660 0.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.4740 1.2647 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -1.7321 0.0000 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.9071 -0.4750 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.0000 1.0000 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.0000 -1.0000 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.8660 -1.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.8660 -2.5000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.8660 -1.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.8561 -2.3746 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 2.4488 -3.1947 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 4.5544 -3.0234 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 5.3132 -1.2000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 6.5741 -1.3179 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 4.8632 0.2250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 4.0294 0.9234 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 2.9488 1.1197 0.0000 S 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.8660 0.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.8811 1.3246 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 1.7321 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 2.5981 0.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 2.4244 1.4848 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 1.6097 2.0768 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.7419 1.9858 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 1.7927 2.9165 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 3.4641 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 3.9301 0.3000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 3.4641 -1.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 4.2072 -1.6691 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 3.8005 -2.5827 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 2.8060 -2.4781 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 2.5981 -1.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 1.7321 -1.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 5.6506 -0.2222 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 6.4172 0.1894 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 6.4966 1.1232 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 5.8342 1.6954 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 6.0136 2.6792 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 5.2512 3.3264 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 5.4306 4.3102 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 4.3096 2.9897 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 3.5473 3.6370 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 3.7266 4.6207 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 4.1303 2.0060 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 4.8676 1.3838 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 2 1 1 1 0 0\n" + + " 2 3 1 0 0 0\n" + + " 3 4 1 0 0 0\n" + + " 4 5 1 0 0 0\n" + + " 5 6 2 0 0 0\n" + + " 6 7 1 0 0 0\n" + + " 6 8 1 0 0 0\n" + + " 8 9 1 0 0 0\n" + + " 9 10 1 0 0 0\n" + + " 8 11 2 0 0 0\n" + + " 11 12 1 0 0 0\n" + + " 11 13 1 0 0 0\n" + + " 4 13 2 0 0 0\n" + + " 13 14 1 0 0 0\n" + + " 14 15 1 1 0 0\n" + + " 14 16 1 0 0 0\n" + + " 2 16 1 0 0 0\n" + + " 16 17 1 0 0 0\n" + + " 14 18 1 0 0 0\n" + + " 18 19 1 1 0 0\n" + + " 18 20 1 0 0 0\n" + + " 20 21 1 0 0 0\n" + + " 2 21 1 0 0 0\n" + + " 21 22 1 1 0 0\n" + + " 20 23 1 0 0 0\n" + + " 23 24 1 1 0 0\n" + + " 23 25 1 0 0 0\n" + + " 25 26 1 0 0 0\n" + + " 26 27 1 0 0 0\n" + + " 27 28 2 0 0 0\n" + + " 29 27 1 0 0 0\n" + + " 29 30 1 6 0 0\n" + + " 30 31 1 0 0 0\n" + + " 31 32 1 0 0 0\n" + + " 18 32 1 0 0 0\n" + + " 32 33 1 1 0 0\n" + + " 32 34 1 0 0 0\n" + + " 34 35 1 0 0 0\n" + + " 35 36 1 0 0 0\n" + + " 36 37 1 0 0 0\n" + + " 37 38 1 0 0 0\n" + + " 37 39 2 0 0 0\n" + + " 35 40 2 0 0 0\n" + + " 40 41 1 0 0 0\n" + + " 40 42 1 0 0 0\n" + + " 42 43 1 0 0 0\n" + + " 43 44 1 0 0 0\n" + + " 44 45 1 0 0 0\n" + + " 45 46 1 0 0 0\n" + + " 42 46 2 0 0 0\n" + + " 46 47 1 0 0 0\n" + + " 23 47 1 0 0 0\n" + + " 34 47 2 0 0 0\n" + + " 29 48 1 0 0 0\n" + + " 48 49 1 0 0 0\n" + + " 49 50 1 0 0 0\n" + + " 50 51 1 0 0 0\n" + + " 51 52 1 0 0 0\n" + + " 52 53 2 0 0 0\n" + + " 53 54 1 0 0 0\n" + + " 53 55 1 0 0 0\n" + + " 55 56 1 0 0 0\n" + + " 56 57 1 0 0 0\n" + + " 55 58 2 0 0 0\n" + + " 58 59 1 0 0 0\n" + + " 29 59 1 0 0 0\n" + + " 51 59 2 0 0 0\n" + + "M END", "[H][C@@]12CC3=C(C(O)=C(OC)C(C)=C3)[C@@]([H])(N1C)[C@@]4([H])N([C@H]2O)[C@@]5([H])COC(=O)[C@]8(CS[C@]4([H])C6=C5C7=C(OCO7)C(C)=C6OC(C)=O)NCCC9=C8C=C(OC)C(O)=C9").name("strereo parity issue 1")); + //C(C)1CCC(C)CC1 + + tests.add(LychiTestInstance.equivalent("[C@H](C)1CCC(C)CC1","[C@@H](C)1CCC(C)CC1").name("meaningless stereo on a ring the same as opposite meaningless stereo on ring")); + + tests.add(LychiTestInstance.equivalent("C[C@H]1CC[C@@H](C)CC1","C[C@@H]1CC[C@H](C)CC1").name("opposite form of cis/trans on ring should be the same")); + + tests.add(LychiTestInstance.notEquivalent("C[C@H]1CC[C@@H](C)CC1","C[C@H]1CC[C@H](C)CC1").name("cis across ring is different from trans across ring")); + + + + + //O[C@H]1CC(O)CC(O)C1 + tests.add(LychiTestInstance.equivalent("O[C@H]1CC(O)CC(O)C1","O[C@@H]1CC(O)CC(O)C1") + .name("3-center, 1 specified meaningless center should be same as inverted")); + + + + //OC1C(O)C(O)C(O)[C@@H](O)[C@H]1O + tests.add(LychiTestInstance.equivalent("OC1C(O)C(O)C(O)[C@@H](O)[C@H]1O","OC1C(O)C(O)C(O)[C@H](O)[C@@H]1O").name("semi-meaningful symmetric stereo honored")); + tests.add(LychiTestInstance.notEquivalent("OC1C(O)C(O)C(O)[C@@H](O)[C@H]1O","OC1C(O)C(O)C(O)[C@@H](O)[C@@H]1O").name("distinct semi-meaningful symmetric stereo honored")); + + tests.add(LychiTestInstance.equivalent("OC1[C@H](O)[C@H](O)C1O","OC1[C@@H](O)[C@@H](O)C1O").name("4-center, 2 specified symmetric meaningful stereo should be same as inverted")); + tests.add(LychiTestInstance.notEquivalent("OC1[C@H](O)[C@H](O)C1O","OC1[C@@H](O)[C@H](O)C1O").name("4-center, 2 specified symmetric meaningful stereo should not be same as 1 center modified")); + + + + //OC1[C@H](O)[C@H](O)C1O + + //C[C@H]1OC(C)O[C@@H](C)O1 + //[#6][C@H]1C[C@@H]([#6])CC([#6])C1.[#6][C@H]2CC([#6])C[C@@H]([#6])C2 + tests.add(LychiTestInstance.equivalent("\n" + + " MJ150420 \n" + + "\n" + + " 8 8 0 0 0 0 0 0 0 0999 V2000\n" + + " -2.2656 0.8138 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -2.9801 0.4013 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -2.9801 -0.4237 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -2.2656 -0.8361 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -1.5511 -0.4237 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -1.5511 0.4013 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.8366 0.8138 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.8366 -0.0111 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 1 2 1 0 0 0 0\n" + + " 1 6 1 0 0 0 0\n" + + " 2 3 1 0 0 0 0\n" + + " 3 4 1 0 0 0 0\n" + + " 4 5 1 0 0 0 0\n" + + " 5 6 1 0 0 0 0\n" + + " 6 7 1 1 0 0 0\n" + + " 6 8 1 6 0 0 0\n" + + "M END","C1CCCCC1").name("meaningless stereo 1")); + + + tests.add(LychiTestInstance.equivalentLayer3("[H][C@@](O)(CO)[C@@]([H])(O)[C@]([H])(O)[C@@]([H])(O)C=O", "[H][C@](O)(C=O)[C@@]([H])(O)[C@]([H])(O)[C@]([H])(O)C([2H])([2H])O") + .name("Hydrogen Isotope Same Layer 3") + ); + + + tests.add(LychiTestInstance.equivalentLayer3("[H][C@]1(CC(O)=O)CCC2=C1N(CC3=CC=C(Cl)C=C3)C4=C2C=C(F)C=C4S(C)(=O)=O", "CS(=O)(=O)C1=CC(F)=CC2=C1N(CC3=CC=C(Cl)C=C3)C4=C2CCC4CC(O)=O") .name("Strange graph invariant problem") - ); - - - //tests.add(LychiTestInstance.of("[H][C@](C)(CC)[C@]([H])(NC(=O)[C@]([H])(CCC(O)=O)N=C(O)[C@]([H])(CCC(O)=O)N=C(O)COCCOCCNC(=O)C1=CC2=C(C=C1)C3(OC2=O)C4=C(OC5=C3C=CC(O)=C5)C=C(O)C=C4)C(=O)N[C@@]([H])(CCCC)C(O)=N[C@@]([H])(CCCN=C(N)N)C(O)=N[C@@]([H])(CCCN=C(N)N)C(O)=NCCCOCC(COCCCN=C(O)[C@]([H])(CCCN=C(N)N)N=C(O)[C@]([H])(CCCN=C(N)N)N=C(O)[C@]([H])(CCCC)NC(=O)[C@@]([H])(NC(=O)[C@]([H])(CCC(O)=O)N=C(O)[C@]([H])(CCC(O)=O)N=C(O)COCCOCCNC(=O)C6=CC7=C(C=C6)C8(OC7=O)C9=C(OC%10=C8C=CC(O)=C%10)C=C(O)C=C9)[C@@]([H])(C)CC)(COCCCN=C(O)[C@]([H])(CCCN=C(N)N)N=C(O)[C@]([H])(CCCN=C(N)N)N=C(O)[C@]([H])(CCCC)NC(=O)[C@@]([H])(NC(=O)[C@]([H])(CCC(O)=O)N=C(O)[C@]([H])(CCC(O)=O)N=C(O)COCCOCCNC(=O)C%11=CC%12=C(C=C%11)C%13(OC%12=O)C%14=C(OC%15=C%13C=CC(O)=C%15)C=C(O)C=C%14)[C@@]([H])(C)CC)N=C(N)O","PY2Z7DXNU-UTQVUB5614-U4T1XF2AQV3-U43YSFQF6PCQ").name("big structure")); - - //These are tests that don't pass currently, because they deal - //with complex symmetry, should be uncommented later - - tests.add(LychiTestInstance.equivalent("C[C@H]1C[C@@H](C)CC(C)C1","C[C@@H]1C[C@H](C)CC(C)C1") - .name("symmetric half-defined stereo should be the same")); - - tests.add(LychiTestInstance.equivalent("[C@H](C)1CCC(C)CC1","C(C)1CCC(C)CC1") + ); + + + //tests.add(LychiTestInstance.of("[H][C@](C)(CC)[C@]([H])(NC(=O)[C@]([H])(CCC(O)=O)N=C(O)[C@]([H])(CCC(O)=O)N=C(O)COCCOCCNC(=O)C1=CC2=C(C=C1)C3(OC2=O)C4=C(OC5=C3C=CC(O)=C5)C=C(O)C=C4)C(=O)N[C@@]([H])(CCCC)C(O)=N[C@@]([H])(CCCN=C(N)N)C(O)=N[C@@]([H])(CCCN=C(N)N)C(O)=NCCCOCC(COCCCN=C(O)[C@]([H])(CCCN=C(N)N)N=C(O)[C@]([H])(CCCN=C(N)N)N=C(O)[C@]([H])(CCCC)NC(=O)[C@@]([H])(NC(=O)[C@]([H])(CCC(O)=O)N=C(O)[C@]([H])(CCC(O)=O)N=C(O)COCCOCCNC(=O)C6=CC7=C(C=C6)C8(OC7=O)C9=C(OC%10=C8C=CC(O)=C%10)C=C(O)C=C9)[C@@]([H])(C)CC)(COCCCN=C(O)[C@]([H])(CCCN=C(N)N)N=C(O)[C@]([H])(CCCN=C(N)N)N=C(O)[C@]([H])(CCCC)NC(=O)[C@@]([H])(NC(=O)[C@]([H])(CCC(O)=O)N=C(O)[C@]([H])(CCC(O)=O)N=C(O)COCCOCCNC(=O)C%11=CC%12=C(C=C%11)C%13(OC%12=O)C%14=C(OC%15=C%13C=CC(O)=C%15)C=C(O)C=C%14)[C@@]([H])(C)CC)N=C(N)O","PY2Z7DXNU-UTQVUB5614-U4T1XF2AQV3-U43YSFQF6PCQ").name("big structure")); + + //These are tests that don't pass currently, because they deal + //with complex symmetry, should be uncommented later + + tests.add(LychiTestInstance.equivalent("C[C@H]1C[C@@H](C)CC(C)C1","C[C@@H]1C[C@H](C)CC(C)C1") + .name("symmetric half-defined stereo should be the same")); + + tests.add(LychiTestInstance.equivalent("[C@H](C)1CCC(C)CC1","C(C)1CCC(C)CC1") .name("meaningless stereo on a ring is the same as no stereo on a ring")); - - tests.add(LychiTestInstance.equivalent("C[C@H]1OC(C)O[C@@H](C)O1","CC1OC(C)OC(C)O1") - .name("meaningless stereo with 2 dashed bonds on ring shouldn't be honored")); - - tests.add(LychiTestInstance.equivalentLayer3("C1CN(CCN1)C2=CC=C(C=C2)C3=CN4N=CC(=C4N=C3)C5=CC=CC6=C5C=CC=N6","C1CN(CCN1)C2=CC=C(C=C2)C3=CN4N=CC(=C4N=C3)C5=CC=NC6=C5C=CC=C6") - .negate() + + tests.add(LychiTestInstance.equivalent("C[C@H]1OC(C)O[C@@H](C)O1","CC1OC(C)OC(C)O1") + .name("meaningless stereo with 2 dashed bonds on ring shouldn't be honored")); + + tests.add(LychiTestInstance.equivalentLayer3("C1CN(CCN1)C2=CC=C(C=C2)C3=CN4N=CC(=C4N=C3)C5=CC=CC6=C5C=CC=N6","C1CN(CCN1)C2=CC=C(C=C2)C3=CN4N=CC(=C4N=C3)C5=CC=NC6=C5C=CC=C6") + .negate() .name("Constitutional Isomer Not Equivalent on Layer 3")); - - //C1CN(CCN1)C2=CC=C(C=C2)C3=CN4N=CC(=C4N=C3)C5=CC=CC6=C5C=CC=N6.C1CN(CCN1)C2=CC=C(C=C2)C3=CN4N=CC(=C4N=C3)C5=CC=NC6=C5C=CC=C6 - - - - return tests.stream().map(ls->ls.asJunitInput()).collect(Collectors.toList()); - } + + //C1CN(CCN1)C2=CC=C(C=C2)C3=CN4N=CC(=C4N=C3)C5=CC=CC6=C5C=CC=N6.C1CN(CCN1)C2=CC=C(C=C2)C3=CN4N=CC(=C4N=C3)C5=CC=NC6=C5C=CC=C6 + + + + return tests.stream().map(ls->ls.asJunitInput()).collect(Collectors.toList()); + } } From 4bd471de8f90e7c74484fe541014af09ccedc64c Mon Sep 17 00:00:00 2001 From: Tyler Peryea Date: Mon, 13 May 2019 13:21:50 -0400 Subject: [PATCH 24/25] update to text layout --- src/test/java/lychi/LychiRegressionTest.java | 23 +++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/test/java/lychi/LychiRegressionTest.java b/src/test/java/lychi/LychiRegressionTest.java index fcac546..7c16f38 100644 --- a/src/test/java/lychi/LychiRegressionTest.java +++ b/src/test/java/lychi/LychiRegressionTest.java @@ -175,9 +175,9 @@ public void correctLychiFirstTime() throws Exception{ @Test public void correctLychiAfter10Times() throws Exception{ - for (int i=0;i<10;i++){ - correctLychiFirstTime(); - } + for (int i=0;i<10;i++){ + correctLychiFirstTime(); + } } @Test @@ -300,16 +300,23 @@ public static List data(){ " 8 10 1 0 0 0 0\n" + "M END") .layer(4) + .name("stereo locked into configuration across ring with double bond should be real") + + //It's set to fail for now .markToFail() - .name("stereo locked into configuration across ring with double bond should be real")); + + + ); //C[C@H]1CC[C@]2(CC1)CC[C@H](C)CC2 - tests.add(LychiTestInstance.notEquivalent("C[C@H]1CC[C@]2(CC1)CC[C@H](C)CC2", - "CC1CCC2(CC1)CCC(C)CC2") + tests.add(LychiTestInstance.notEquivalent("C[C@H]1CC[C@]2(CC1)CC[C@H](O)CC2", + "CC1CCC2(CC1)CCC(O)CC2") .layer(4) - .markToFail() - .name("allene-like ring stereo should be honored")); + .name("allene-like ring stereo should be honored") + //It's set to fail for now + .markToFail() + ); //C1[C@H]2C[C@@H]3C[C@H]1CC(C2)C3 From 851b9978c1a718c0d254f0c30db1b4c9b8af4a0f Mon Sep 17 00:00:00 2001 From: Tyler Peryea Date: Tue, 14 May 2019 11:18:50 -0400 Subject: [PATCH 25/25] change SSSR heuristic calculator --- src/main/java/lychi/LyChIStandardizer.java | 4 +- src/test/java/lychi/LychiRegressionTest.java | 138 ++++++++++++++++++- 2 files changed, 140 insertions(+), 2 deletions(-) diff --git a/src/main/java/lychi/LyChIStandardizer.java b/src/main/java/lychi/LyChIStandardizer.java index f3eb801..820bb79 100644 --- a/src/main/java/lychi/LyChIStandardizer.java +++ b/src/main/java/lychi/LyChIStandardizer.java @@ -1195,7 +1195,9 @@ else if (chiral != 0) { maxRing=Math.max(maxRing, ma.sringsize()); } - int[][] sssr=m.getNonAromrings(maxRing); + + + int[][] sssr=m.getSSSR(); for(MolAtom ma:nonChiralStereo.keySet()){ int mm =ma.sringsize(); //need to find all atoms in the ring diff --git a/src/test/java/lychi/LychiRegressionTest.java b/src/test/java/lychi/LychiRegressionTest.java index 7c16f38..3d34814 100644 --- a/src/test/java/lychi/LychiRegressionTest.java +++ b/src/test/java/lychi/LychiRegressionTest.java @@ -326,7 +326,143 @@ public static List data(){ "C1C2CC3CC1CC(C2)C3") .layer(4) .name("adamantane stereo shouldn't be real")); - + + tests.add(LychiTestInstance.notEquivalent(" \n" + + " Marvin 05141911062D \n" + + "\n" + + " 30 32 0 0 0 0 999 V2000\n" + + " 3.4771 -1.6225 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 3.4771 -0.7975 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 3.4771 0.0275 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 4.1916 0.4400 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 4.1916 1.2650 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 3.4771 1.6775 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 2.7626 1.2650 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 2.7626 0.4400 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 2.0482 0.0275 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 1.3337 0.4400 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 1.3337 1.2650 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.6192 1.6775 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.0953 1.2650 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.0953 0.4400 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.8097 0.0275 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -1.5242 0.4400 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -1.5242 1.2650 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -2.2387 1.6775 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -2.9531 1.2650 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -2.9531 0.4400 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -2.2387 0.0275 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -2.2387 -0.7975 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -2.9531 -1.2100 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -2.9531 -2.0350 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -3.6676 -2.4475 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -3.6676 -0.7975 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -1.5242 -1.2100 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -1.5242 -2.0350 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.8097 -2.4475 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.6192 0.0275 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 1 2 3 0 0 0 0\n" + + " 2 3 1 0 0 0 0\n" + + " 3 4 2 0 0 0 0\n" + + " 4 5 1 0 0 0 0\n" + + " 5 6 2 0 0 0 0\n" + + " 6 7 1 0 0 0 0\n" + + " 7 8 2 0 0 0 0\n" + + " 3 8 1 0 0 0 0\n" + + " 8 9 1 0 0 0 0\n" + + " 9 10 1 0 0 0 0\n" + + " 10 11 2 0 0 0 0\n" + + " 11 12 1 0 0 0 0\n" + + " 12 13 2 0 0 0 0\n" + + " 13 14 1 0 0 0 0\n" + + " 14 15 1 0 0 0 0\n" + + " 15 16 1 0 0 0 0\n" + + " 16 17 2 0 0 0 0\n" + + " 17 18 1 0 0 0 0\n" + + " 18 19 2 0 0 0 0\n" + + " 19 20 1 0 0 0 0\n" + + " 20 21 2 0 0 0 0\n" + + " 16 21 1 0 0 0 0\n" + + " 21 22 1 0 0 0 0\n" + + " 22 23 1 0 0 0 0\n" + + " 23 24 1 0 0 0 0\n" + + " 24 25 1 0 0 0 0\n" + + " 23 26 2 0 0 0 0\n" + + " 22 27 2 0 0 0 0\n" + + " 27 28 1 4 0 0 0\n" + + " 28 29 1 0 0 0 0\n" + + " 14 30 2 0 0 0 0\n" + + " 10 30 1 0 0 0 0\n" + + "M END", + " \n" + + " Marvin 05141911062D \n" + + "\n" + + " 30 32 0 0 0 0 999 V2000\n" + + " 3.4771 -1.6225 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 3.4771 -0.7975 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 3.4771 0.0275 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 4.1916 0.4400 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 4.1916 1.2650 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 3.4771 1.6775 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 2.7626 1.2650 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 2.7626 0.4400 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 2.0482 0.0275 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 1.3337 0.4400 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 1.3337 1.2650 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.6192 1.6775 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.0953 1.2650 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.0953 0.4400 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.8097 0.0275 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -1.5242 0.4400 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -1.5242 1.2650 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -2.2387 1.6775 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -2.9531 1.2650 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -2.9531 0.4400 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -2.2387 0.0275 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -2.2387 -0.7975 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -2.9531 -1.2100 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -2.9531 -2.0350 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -3.6676 -2.4475 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -3.6676 -0.7975 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -1.5242 -1.2100 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -1.5242 -2.0350 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " -0.8097 -2.4475 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 0.6192 0.0275 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" + + " 1 2 3 0 0 0 0\n" + + " 2 3 1 0 0 0 0\n" + + " 3 4 2 0 0 0 0\n" + + " 4 5 1 0 0 0 0\n" + + " 5 6 2 0 0 0 0\n" + + " 6 7 1 0 0 0 0\n" + + " 7 8 2 0 0 0 0\n" + + " 3 8 1 0 0 0 0\n" + + " 8 9 1 0 0 0 0\n" + + " 9 10 1 0 0 0 0\n" + + " 10 11 2 0 0 0 0\n" + + " 11 12 1 0 0 0 0\n" + + " 12 13 2 0 0 0 0\n" + + " 13 14 1 0 0 0 0\n" + + " 14 15 1 0 0 0 0\n" + + " 15 16 1 0 0 0 0\n" + + " 16 17 2 0 0 0 0\n" + + " 17 18 1 0 0 0 0\n" + + " 18 19 2 0 0 0 0\n" + + " 19 20 1 0 0 0 0\n" + + " 20 21 2 0 0 0 0\n" + + " 16 21 1 0 0 0 0\n" + + " 21 22 1 0 0 0 0\n" + + " 22 23 1 0 0 0 0\n" + + " 23 24 1 0 0 0 0\n" + + " 24 25 1 0 0 0 0\n" + + " 23 26 2 0 0 0 0\n" + + " 22 27 2 0 0 0 0\n" + + " 27 28 1 0 0 0 0\n" + + " 28 29 1 0 0 0 0\n" + + " 14 30 2 0 0 0 0\n" + + " 10 30 1 0 0 0 0\n" + + "M END") + .layer(4) + .name("wiggle bond on double bond geometry should be different than trans/cis")); //