From b5ddd09db6f43df5381388375113c9d0bf1c817e Mon Sep 17 00:00:00 2001 From: Ayah Helal Date: Thu, 14 Apr 2022 16:39:13 +0100 Subject: [PATCH 1/3] Addition of initial ensemble algorithms Features Added: Instance Bootstrapping Feature Bagging New Algorithms eAnt-Miner_(HMA) eAnt-Miner_(PB + HMA) ecAnt-Miner_(PB) --- .gitignore | 1 + .../EnsembleMixedAttributeAntMiner.java | 147 +++++++++++ .../EnsemblePittsburghContinuousAntMiner.java | 153 ++++++++++++ ...emblePittsburghMixedAttributeAntMiner.java | 158 ++++++++++++ .../algorithm/MixedAttributeAntMiner.java | 10 +- .../PittsburghContinuousAntMiner.java | 3 +- .../PittsburghMixedAttributeAntMiner.java | 6 +- .../classification/rule/SinglePassPruner.java | 10 +- src/main/java/myra/datamining/Dataset.java | 34 ++- .../java/myra/datamining/VariableArchive.java | 26 +- .../bagging/BaggingArchiveRuleFactory.java | 182 ++++++++++++++ .../bagging/BaggingLevelRuleFactory.java | 233 ++++++++++++++++++ .../myra/ensemble/bagging/EnsembleModel.java | 143 +++++++++++ .../myra/ensemble/bagging/FeatureBagging.java | 62 +++++ .../bagging/InstanceBootstrapping.java | 96 ++++++++ .../myra/rule/archive/ArchiveRuleFactory.java | 6 +- src/main/java/myra/rule/archive/Graph.java | 43 +++- src/main/java/myra/rule/archive/Variable.java | 33 +-- .../hierarchical/AUPRCTest.java | 4 +- .../myra/datamining/VariableArchiveTest.java | 18 +- .../java/myra/rule/archive/GraphTest.java | 2 +- 21 files changed, 1312 insertions(+), 58 deletions(-) create mode 100644 src/main/java/myra/algorithm/EnsembleMixedAttributeAntMiner.java create mode 100644 src/main/java/myra/algorithm/EnsemblePittsburghContinuousAntMiner.java create mode 100644 src/main/java/myra/algorithm/EnsemblePittsburghMixedAttributeAntMiner.java create mode 100644 src/main/java/myra/ensemble/bagging/BaggingArchiveRuleFactory.java create mode 100644 src/main/java/myra/ensemble/bagging/BaggingLevelRuleFactory.java create mode 100644 src/main/java/myra/ensemble/bagging/EnsembleModel.java create mode 100644 src/main/java/myra/ensemble/bagging/FeatureBagging.java create mode 100644 src/main/java/myra/ensemble/bagging/InstanceBootstrapping.java diff --git a/.gitignore b/.gitignore index b83d222..bef0727 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ /target/ +.idea diff --git a/src/main/java/myra/algorithm/EnsembleMixedAttributeAntMiner.java b/src/main/java/myra/algorithm/EnsembleMixedAttributeAntMiner.java new file mode 100644 index 0000000..ddea7c6 --- /dev/null +++ b/src/main/java/myra/algorithm/EnsembleMixedAttributeAntMiner.java @@ -0,0 +1,147 @@ +/* + * EnsembleMixedAttributeAntMiner.java + * (this file is part of MYRA) + * + * Copyright 2008-2025 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package myra.algorithm; + +import static myra.Config.CONFIG; +import static myra.Scheduler.COLONY_SIZE; +import static myra.ensemble.bagging.EnsembleModel.DEFAULT_COLONIES; +import static myra.ensemble.bagging.FeatureBagging.DEFAULT_FEATURE_BAGGING; +import static myra.ensemble.bagging.InstanceBootstrapping.DEFAULT_INSTANCE_BOOTSTRAPPING; +import static myra.rule.Heuristic.DEFAULT_HEURISTIC; +import static myra.rule.Heuristic.DYNAMIC_HEURISTIC; +import static myra.rule.irl.RuleFactory.DEFAULT_FACTORY; + +import java.util.ArrayList; +import java.util.Collection; + +import myra.Option; + +import myra.Config.ConfigKey; +import myra.Option.DoubleOption; +import myra.Option.IntegerOption; + +import myra.classification.ClassificationModel; +import myra.datamining.Dataset; +import myra.ensemble.bagging.BaggingArchiveRuleFactory; +import myra.ensemble.bagging.EnsembleModel; +import myra.ensemble.bagging.FeatureBagging; +import myra.ensemble.bagging.InstanceBootstrapping; + +import myra.rule.archive.Graph; +import myra.rule.irl.SequentialCovering; + +/** + * Executable class file for the eAnt-MinerHMA + * algorithm. + * + * @author Ayah Helal + * @author James Brookhouse + */ +public class EnsembleMixedAttributeAntMiner extends MixedAttributeAntMiner { + + public static final ConfigKey DEFAULT_VALIDATION = new ConfigKey<>(); + + @Override + protected void defaults() { + super.defaults(); + + // configuration not set via command line + CONFIG.set(DEFAULT_FACTORY, new BaggingArchiveRuleFactory()); + + // default configuration values + CONFIG.set(DEFAULT_FEATURE_BAGGING, 0.4); + CONFIG.set(DEFAULT_INSTANCE_BOOTSTRAPPING, 0.4); + CONFIG.set(DEFAULT_VALIDATION, 0.2); + CONFIG.set(DEFAULT_COLONIES, 10); + CONFIG.set(COLONY_SIZE, 10); + } + + @Override + protected Collection> options() { + ArrayList> options = new ArrayList<>(); + + for (Option option : super.options()) { + if (option.getKey() != DEFAULT_HEURISTIC + && option.getKey() != DYNAMIC_HEURISTIC) { + options.add(option); + } + } + + // percentage of bagged features + options.add(new DoubleOption(DEFAULT_FEATURE_BAGGING, + "fb", + "specify the percentage of feature bagging %s", + "value")); + + // percentage of bagged features + options.add(new IntegerOption(DEFAULT_COLONIES, + "ec", + "specify the number of different colonies %s", + "value")); + + // percentage of bagged instances + options.add(new DoubleOption(DEFAULT_INSTANCE_BOOTSTRAPPING, + "ib", + "specify the percentage of instance bagging %s", + "value")); + + // percentage of bagged instances + options.add(new DoubleOption(DEFAULT_VALIDATION, + "va", + "specify the percentage of the dataset to be used for validation %s", + "value")); + + return options; + } + + @Override + public ClassificationModel train(Dataset dataset) { + int colonies = CONFIG.get(DEFAULT_COLONIES); + EnsembleModel model = new EnsembleModel(); + + for(int i=0 ; i < colonies;i++) { + Dataset set = InstanceBootstrapping.bootstrapping(dataset,true); + set.setMask(FeatureBagging.generateBaggingMask(dataset.attributes().length)); + SequentialCovering seco = new SequentialCovering(); + model.add(new ClassificationModel(seco.train(dataset, new Graph(dataset)))); + } + model.setQuality(dataset); + return new ClassificationModel(model); + } + + @Override + public String description() { + return "Ensemble Mixed-Attribute Ant-Miner"; + } + + /** + * Ant-MinerHMA entry point. + * + * @param args + * command-line arguments. + * + * @throws Exception + * If an error occurs — e.g., I/O error. + */ + public static void main(String[] args) throws Exception { + EnsembleMixedAttributeAntMiner algorithm = new EnsembleMixedAttributeAntMiner(); + algorithm.run(args); + } +} diff --git a/src/main/java/myra/algorithm/EnsemblePittsburghContinuousAntMiner.java b/src/main/java/myra/algorithm/EnsemblePittsburghContinuousAntMiner.java new file mode 100644 index 0000000..9841c76 --- /dev/null +++ b/src/main/java/myra/algorithm/EnsemblePittsburghContinuousAntMiner.java @@ -0,0 +1,153 @@ +/* + * EnsembleMixedAttributeAntMiner.java + * (this file is part of MYRA) + * + * Copyright 2008-2025 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package myra.algorithm; + +import static myra.Config.CONFIG; +import static myra.Scheduler.COLONY_SIZE; +import static myra.rule.Heuristic.DEFAULT_HEURISTIC; +import static myra.rule.Heuristic.DYNAMIC_HEURISTIC; + +import static myra.ensemble.bagging.EnsembleModel.DEFAULT_COLONIES; +import static myra.ensemble.bagging.FeatureBagging.DEFAULT_FEATURE_BAGGING; +import static myra.ensemble.bagging.InstanceBootstrapping.DEFAULT_INSTANCE_BOOTSTRAPPING; + +import java.util.ArrayList; +import java.util.Collection; + +import myra.IterativeActivity; +import myra.Option; +import myra.Scheduler; +import myra.Option.DoubleOption; +import myra.Option.IntegerOption; + +import myra.classification.ClassificationModel; + +import myra.datamining.Dataset; +import myra.ensemble.bagging.BaggingLevelRuleFactory; +import myra.ensemble.bagging.EnsembleModel; +import myra.ensemble.bagging.FeatureBagging; +import myra.ensemble.bagging.InstanceBootstrapping; +import myra.rule.RuleList; +import myra.rule.Graph; +import myra.rule.pittsburgh.FindRuleListActivity; +import myra.rule.pittsburgh.LevelPheromonePolicy; + +/** + * This class represents the ecAnt-MinerPB + * implementation, as described in the paper: + * + *
+ * @ARTICLE{Brookhouse2025ensemble,
+ *    author  = {J. Brookhouse and A. Helal and F.E.B. Otero},
+ *    title   = {An Ensemble Ant Colony Optimisation Algorithm with a Hybrid Pheromone Model for Learning Rule Lists},
+ *    journal = {Genetic and Evolutionary Computation Conference (GECCO '25)},
+ *    year    = {2025},
+ *    pages   = {1--9}
+ * }
+ * 
+ * + * @author Ayah Helal + * @author James Brookhouse + */ +public class EnsemblePittsburghContinuousAntMiner extends PittsburghContinuousAntMiner { + + @Override + protected void defaults() { + super.defaults(); + // default configuration values + CONFIG.set(DEFAULT_FEATURE_BAGGING, 0.4); + CONFIG.set(DEFAULT_INSTANCE_BOOTSTRAPPING, 0.4); + CONFIG.set(DEFAULT_COLONIES, 10); + CONFIG.set(COLONY_SIZE, 10); + } + + @Override + protected Collection> options() { + ArrayList> options = new ArrayList<>(); + + for (Option option : super.options()) { + if (option.getKey() != DEFAULT_HEURISTIC + && option.getKey() != DYNAMIC_HEURISTIC) { + options.add(option); + } + } + + // percentage of bagged features + options.add(new DoubleOption(DEFAULT_FEATURE_BAGGING, + "fb", + "specify the percentage of feature bagging %s", + "value")); + + // percentage of bagged features + options.add(new IntegerOption(DEFAULT_COLONIES, + "ec", + "specify the number of different colonies %s", + "value")); + + // percentage of bagged instances + options.add(new DoubleOption(DEFAULT_INSTANCE_BOOTSTRAPPING, + "ib", + "specify the percentage of instance bagging %s", + "value")); + return options; + } + + @Override + public ClassificationModel train(Dataset dataset) { + int colonies = CONFIG.get(DEFAULT_COLONIES); + EnsembleModel model = new EnsembleModel(); + + for(int i=0 ; i < colonies;i++) { + Dataset set = InstanceBootstrapping.bootstrapping(dataset,true); + set.setMask(FeatureBagging.generateBaggingMask(dataset.attributes().length)); + IterativeActivity activity = + new FindRuleListActivity(new Graph(dataset), + dataset, + new BaggingLevelRuleFactory(), + new LevelPheromonePolicy()); + + Scheduler scheduler = Scheduler.newInstance(1); + scheduler.setActivity(activity); + scheduler.run(); + model.add(new ClassificationModel(activity.getBest())); + } + model.setQuality(dataset); + return new ClassificationModel(model); + } + + @Override + public String description() { + return "Ensemble Pittsburgh Continuous-Attribute Ant-Miner"; + } + + /** + * ecAnt-MinerPB entry point. + * + * @param args + * command-line arguments. + * + * @throws Exception + * If an error occurs — e.g., I/O error. + */ + public static void main(String[] args) throws Exception { + EnsemblePittsburghContinuousAntMiner algorithm = new EnsemblePittsburghContinuousAntMiner(); + algorithm.run(args); + } +} diff --git a/src/main/java/myra/algorithm/EnsemblePittsburghMixedAttributeAntMiner.java b/src/main/java/myra/algorithm/EnsemblePittsburghMixedAttributeAntMiner.java new file mode 100644 index 0000000..6ddb1dc --- /dev/null +++ b/src/main/java/myra/algorithm/EnsemblePittsburghMixedAttributeAntMiner.java @@ -0,0 +1,158 @@ +/* + * EnsembleMixedAttributeAntMiner.java + * (this file is part of MYRA) + * + * Copyright 2008-2020 ahh209 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package myra.algorithm; + + +import static myra.Config.CONFIG; +import static myra.Scheduler.COLONY_SIZE; +import static myra.rule.Heuristic.DEFAULT_HEURISTIC; +import static myra.rule.Heuristic.DYNAMIC_HEURISTIC; + +import static myra.rule.irl.RuleFactory.DEFAULT_FACTORY; +import static myra.ensemble.bagging.EnsembleModel.DEFAULT_COLONIES; +import static myra.ensemble.bagging.FeatureBagging.DEFAULT_FEATURE_BAGGING; +import static myra.ensemble.bagging.InstanceBootstrapping.DEFAULT_INSTANCE_BOOTSTRAPPING; + +import java.util.ArrayList; +import java.util.Collection; + +import myra.IterativeActivity; +import myra.Option; +import myra.Scheduler; +import myra.Option.DoubleOption; +import myra.Option.IntegerOption; + +import myra.classification.ClassificationModel; + +import myra.datamining.Dataset; +import myra.ensemble.bagging.BaggingArchiveRuleFactory; +import myra.ensemble.bagging.EnsembleModel; +import myra.ensemble.bagging.FeatureBagging; +import myra.ensemble.bagging.InstanceBootstrapping; +import myra.rule.RuleList; +import myra.rule.archive.ArchivePheromonePolicy; +import myra.rule.archive.Graph; +import myra.rule.pittsburgh.FindRuleListActivity; + +/** + * This class represents the ecAnt-MinerPB + HMA + * implementation, as described in the paper: + * + *
+ * @ARTICLE{Brookhouse2025ensemble,
+ *    author  = {J. Brookhouse and A. Helal and F.E.B. Otero},
+ *    title   = {An Ensemble Ant Colony Optimisation Algorithm with a Hybrid Pheromone Model for Learning Rule Lists},
+ *    journal = {Genetic and Evolutionary Computation Conference (GECCO '25)},
+ *    year    = {2025},
+ *    pages   = {1--9}
+ * }
+ * 
+ * + * @author Ayah Helal + * @author James Brookhouse + */ +public class EnsemblePittsburghMixedAttributeAntMiner extends PittsburghMixedAttributeAntMiner { + + @Override + protected void defaults() { + super.defaults(); + // configuration not set via command line + CONFIG.set(DEFAULT_FACTORY, new BaggingArchiveRuleFactory()); + + // default configuration values + CONFIG.set(DEFAULT_FEATURE_BAGGING, 0.4); + CONFIG.set(DEFAULT_INSTANCE_BOOTSTRAPPING, 0.4); + CONFIG.set(DEFAULT_COLONIES, 10); + CONFIG.set(COLONY_SIZE, 10); + } + + @Override + protected Collection> options() { + ArrayList> options = new ArrayList<>(); + + for (Option option : super.options()) { + if (option.getKey() != DEFAULT_HEURISTIC + && option.getKey() != DYNAMIC_HEURISTIC) { + options.add(option); + } + } + + // percentage of bagged features + options.add(new DoubleOption(DEFAULT_FEATURE_BAGGING, + "fb", + "specify the percentage of feature bagging %s", + "value")); + + // percentage of bagged features + options.add(new IntegerOption(DEFAULT_COLONIES, + "ec", + "specify the number of different colonies %s", + "value")); + + // percentage of bagged instances + options.add(new DoubleOption(DEFAULT_INSTANCE_BOOTSTRAPPING, + "ib", + "specify the percentage of instance bagging %s", + "value")); + return options; + } + + @Override + public ClassificationModel train(Dataset dataset) { + int colonies = CONFIG.get(DEFAULT_COLONIES); + EnsembleModel model = new EnsembleModel(); + + for(int i=0 ; i < colonies;i++) { + Dataset set = InstanceBootstrapping.bootstrapping(dataset,true); + set.setMask(FeatureBagging.generateBaggingMask(dataset.attributes().length)); + IterativeActivity activity = + new FindRuleListActivity(new Graph(dataset), + set, + new BaggingArchiveRuleFactory(), + new ArchivePheromonePolicy()); + + Scheduler scheduler = Scheduler.newInstance(1); + scheduler.setActivity(activity); + scheduler.run(); + model.add(new ClassificationModel(activity.getBest())); + } + model.setQuality(dataset); + return new ClassificationModel(model); + } + + @Override + public String description() { + return "Ensemble Pittsburgh Mixed-Attribute Ant-Miner"; + } + + /** + * ecAnt-MinerPB + HMA entry point. + * + * @param args + * command-line arguments. + * + * @throws Exception + * If an error occurs — e.g., I/O error. + */ + public static void main(String[] args) throws Exception { + EnsemblePittsburghMixedAttributeAntMiner algorithm = new EnsemblePittsburghMixedAttributeAntMiner(); + algorithm.run(args); + } +} diff --git a/src/main/java/myra/algorithm/MixedAttributeAntMiner.java b/src/main/java/myra/algorithm/MixedAttributeAntMiner.java index 2d56bd2..78b404c 100644 --- a/src/main/java/myra/algorithm/MixedAttributeAntMiner.java +++ b/src/main/java/myra/algorithm/MixedAttributeAntMiner.java @@ -20,6 +20,7 @@ package myra.algorithm; import static myra.Archive.ARCHIVE_SIZE; +import static myra.Scheduler.COLONY_SIZE; import static myra.Archive.DEFAULT_Q; import static myra.Archive.Q; import static myra.Config.CONFIG; @@ -33,6 +34,7 @@ import static myra.rule.pittsburgh.LevelPheromonePolicy.P_BEST; import static myra.rule.irl.RuleFactory.DEFAULT_FACTORY; import static myra.rule.irl.PheromonePolicy.DEFAULT_POLICY; +import static myra.IterativeActivity.MAX_ITERATIONS; import java.util.ArrayList; import java.util.Collection; @@ -43,6 +45,7 @@ import myra.classification.ClassificationModel; import myra.classification.rule.SinglePassPruner; import myra.datamining.Dataset; +import myra.rule.BacktrackPruner; import myra.rule.Heuristic; import myra.rule.Pruner; import myra.rule.archive.ArchivePheromonePolicy; @@ -79,7 +82,8 @@ protected void defaults() { super.defaults(); // configuration not set via command line - + CONFIG.set(COLONY_SIZE, 5); + CONFIG.set(MAX_ITERATIONS, 1000); CONFIG.set(P_BEST, 0.05); CONFIG.set(DEFAULT_HEURISTIC, new Heuristic.None()); CONFIG.set(DEFAULT_FACTORY, new ArchiveRuleFactory()); @@ -89,8 +93,8 @@ protected void defaults() { // default configuration values CONFIG.set(EVAPORATION_FACTOR, 0.9); - CONFIG.set(DEFAULT_PRUNER, new SinglePassPruner()); - CONFIG.set(ARCHIVE_SIZE, 20); + CONFIG.set(DEFAULT_PRUNER, new BacktrackPruner()); + CONFIG.set(ARCHIVE_SIZE, 39); CONFIG.set(Q, DEFAULT_Q); CONFIG.set(CONVERGENCE_SPEED, DEFAULT_CONVERGENCE_SPEED); } diff --git a/src/main/java/myra/algorithm/PittsburghContinuousAntMiner.java b/src/main/java/myra/algorithm/PittsburghContinuousAntMiner.java index 5b302d0..03b9b2c 100644 --- a/src/main/java/myra/algorithm/PittsburghContinuousAntMiner.java +++ b/src/main/java/myra/algorithm/PittsburghContinuousAntMiner.java @@ -65,6 +65,7 @@ import myra.classification.rule.MajorityAssignator; import myra.classification.rule.PessimisticAccuracy; import myra.classification.rule.RuleClassifier; +import myra.classification.rule.SinglePassPruner; import myra.classification.rule.function.Laplace; import myra.classification.rule.function.MEstimate; import myra.classification.rule.function.SensitivitySpecificity; @@ -131,7 +132,7 @@ protected void defaults() { // default configuration values - CONFIG.set(COLONY_SIZE, 5); + CONFIG.set(COLONY_SIZE, 10); CONFIG.set(MAX_ITERATIONS, 500); CONFIG.set(MINIMUM_CASES, 10); CONFIG.set(EVAPORATION_FACTOR, 0.9); diff --git a/src/main/java/myra/algorithm/PittsburghMixedAttributeAntMiner.java b/src/main/java/myra/algorithm/PittsburghMixedAttributeAntMiner.java index b0e1ba7..4fb18de 100644 --- a/src/main/java/myra/algorithm/PittsburghMixedAttributeAntMiner.java +++ b/src/main/java/myra/algorithm/PittsburghMixedAttributeAntMiner.java @@ -63,6 +63,7 @@ import myra.classification.rule.function.SensitivitySpecificity; import myra.datamining.Dataset; import myra.datamining.Model; +import myra.rule.BacktrackPruner; import myra.rule.Heuristic; import myra.rule.ListMeasure; import myra.rule.ListPruner; @@ -179,7 +180,8 @@ public void set(String value) { "specify the rule pruner %s", true, "method"); - pruner.add("single-pass", CONFIG.get(DEFAULT_PRUNER)); + pruner.add("singlepass", CONFIG.get(DEFAULT_PRUNER)); + pruner.add("backtrack", new BacktrackPruner()); pruner.add("none", new Pruner.None()); options.add(pruner); @@ -256,7 +258,7 @@ protected Model train(Dataset dataset) { @Override protected String description() { - return "Mixed-Attribute Ant-Miner"; + return "Pittsburgh Mixed-Attribute Ant-Miner"; } /** diff --git a/src/main/java/myra/classification/rule/SinglePassPruner.java b/src/main/java/myra/classification/rule/SinglePassPruner.java index f9a42d4..fa706bb 100644 --- a/src/main/java/myra/classification/rule/SinglePassPruner.java +++ b/src/main/java/myra/classification/rule/SinglePassPruner.java @@ -20,6 +20,7 @@ package myra.classification.rule; import static myra.Config.CONFIG; +import static myra.datamining.Algorithm.RANDOM_SEED; import static myra.datamining.Dataset.COVERED; import static myra.datamining.Dataset.NOT_COVERED; import static myra.datamining.Dataset.RULE_COVERED; @@ -184,10 +185,11 @@ private void reset(Dataset dataset, Rule rule) { // reset the covered instances Instance.mark(instances, RULE_COVERED, NOT_COVERED); - - for (int index : coverage.instances) { - instances[index].flag = RULE_COVERED; - } + + for (int index : coverage.instances) { + instances[index].flag = RULE_COVERED; + } + if (!dataset.isHierarchical()) { ClassificationRule r = (ClassificationRule) rule; diff --git a/src/main/java/myra/datamining/Dataset.java b/src/main/java/myra/datamining/Dataset.java index d34c6c0..d59dec0 100644 --- a/src/main/java/myra/datamining/Dataset.java +++ b/src/main/java/myra/datamining/Dataset.java @@ -31,6 +31,7 @@ import myra.classification.Classifier; import myra.classification.Label; import myra.datamining.Hierarchy.Node; +import myra.ensemble.bagging.FeatureBagging; /** * This class represents the data. @@ -98,12 +99,17 @@ public final class Dataset { * Instance labels for hierarchical/multi-label problems. */ private Label[] labels; - + + /** + * Bagging mask + */ + private double[] baggingMasks; /** * Default constructor. */ public Dataset() { attributes = new Attribute[0]; + baggingMasks = new double[0]; instances = new double[0]; distribution = new double[0]; labels = new Label[0]; @@ -139,6 +145,15 @@ public Attribute getTarget() { public Attribute[] attributes() { return attributes; } + + /** + * Sets the attributes of the dataset. + * + * + */ + public void setAttributes(Attribute[] a) { + attributes = a; + } /** * Returns the attribute with the specified name. @@ -746,6 +761,23 @@ public void setHierarchy(Hierarchy hierarchy) { public boolean isHierarchical() { return (hierarchy != null); } + + /** + * set bagging mask + */ + public void setMask(double[] mask) { + this.baggingMasks = mask.clone(); + } + + /** + * return bagging mask + */ + public double[] getMask() { + this.baggingMasks = FeatureBagging.generateBaggingMask(attributes.length); + if(this.baggingMasks.length > 0) + return this.baggingMasks; + return null; + } /** * Struct-like class to hold the information about an instance. diff --git a/src/main/java/myra/datamining/VariableArchive.java b/src/main/java/myra/datamining/VariableArchive.java index 1c5ff9e..7c1c6bd 100644 --- a/src/main/java/myra/datamining/VariableArchive.java +++ b/src/main/java/myra/datamining/VariableArchive.java @@ -27,6 +27,7 @@ import myra.Archive.DefaultArchive; import myra.Config.ConfigKey; import myra.Weighable; +import myra.datamining.Dataset.Instance; /** * This class represents a local archive—i.e., a variable of a solution, @@ -65,7 +66,7 @@ public abstract class VariableArchive> * * @return an attribute condition. */ - public abstract E sample(); + public abstract E sample(Dataset dataset); /** * Adds the specified value to the archive. @@ -105,6 +106,11 @@ public static class Continuous extends VariableArchive { * The upper bound of values in the attribute domain. */ private double upper; + + /** + * The index of the attribute in the dataset. + */ + private int index; /** * Default constructor. @@ -115,9 +121,10 @@ public static class Continuous extends VariableArchive { * the upper bound of values in the attribute domain. * */ - public Continuous(double lower, double upper) { + public Continuous(double lower, double upper, int index) { this.lower = lower; this.upper = upper; + this.index = index; precision = (int) Math.pow(10, CONFIG.get(PRECISION)); archive = @@ -130,13 +137,16 @@ public void add(Double value, double quality) { } @Override - public Double sample() { + public Double sample(Dataset dataset) { double sampled = 0.0; if (!archive.isFull()) { - sampled = (CONFIG.get(RANDOM_GENERATOR).nextDouble() - * (upper - lower)) + lower; + + double[] instance = dataset.get(CONFIG.get(RANDOM_GENERATOR).nextInt(dataset.size())); + sampled = instance[index]; + } else { + // roulette selection based on the weight of each value Comparable>[] solutions = archive.solutions(); @@ -164,7 +174,7 @@ public Double sample() { sampled = value(selected); } - + sampled = (int) (sampled * precision); return sampled / precision; } @@ -202,7 +212,7 @@ private Double value(int selected) { @Override public VariableArchive.Continuous clone() { - return new VariableArchive.Continuous(lower, upper); + return new VariableArchive.Continuous(lower, upper, index); } } @@ -239,7 +249,7 @@ public void add(Integer value, double quality) { } @Override - public Integer sample() { + public Integer sample(Dataset dataset) { if (!archive.isFull()) { // random sampling, since archive is not complete return CONFIG.get(RANDOM_GENERATOR).nextInt(length); diff --git a/src/main/java/myra/ensemble/bagging/BaggingArchiveRuleFactory.java b/src/main/java/myra/ensemble/bagging/BaggingArchiveRuleFactory.java new file mode 100644 index 0000000..d699958 --- /dev/null +++ b/src/main/java/myra/ensemble/bagging/BaggingArchiveRuleFactory.java @@ -0,0 +1,182 @@ +/* + * BaggingArchiveRuleFactory.java + * (this file is part of MYRA) + * + * Copyright 2008-2025 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package myra.ensemble.bagging; + +import static myra.Config.CONFIG; +import static myra.datamining.Algorithm.RANDOM_GENERATOR; +import static myra.rule.Graph.START_INDEX; + +import myra.datamining.Dataset; +import myra.datamining.Dataset.Instance; +import myra.rule.Rule; +import myra.rule.Graph.Entry; +import myra.rule.Rule.Term; +import myra.rule.archive.ArchiveRuleFactory; +import myra.rule.archive.Graph; +import myra.rule.archive.Graph.Vertex; +import myra.rule.irl.RuleFactory; + +/** + * @author ahh209 + */ +public class BaggingArchiveRuleFactory extends ArchiveRuleFactory implements RuleFactory { + + @Override + public Rule create(int level, + myra.rule.Graph graph, + Entry[] heuristic, + Dataset dataset, + Instance[] instances) { + + if (!Graph.class.isInstance(graph)) { + throw new IllegalArgumentException("Invalid graph class: " + + graph.getClass()); + } + + return this.create(level, (Graph) graph, heuristic, dataset, instances); + } + + /** + * Creates a classification rule. Note that this method does not determine + * the consequent of the rule. + * + * @param level + * the id (sequence) of the rule. + * @param graph + * the construction graph. + * @param heuristic + * the heuristic values. + * @param dataset + * the current dataset. + * @param instances + * the covered instances flag. + * + * @return a classification rule. + */ + public Rule create(int level, + Graph graph, + Entry[] heuristic, + Dataset dataset, + Instance[] instances) { + // the rule being created (empty at the start) + Rule rule = Rule.newInstance(graph.size() / 2); + int previous = START_INDEX; + + double[] pheromone = new double[graph.size()]; + boolean[] incompatible = new boolean[graph.size()]; + incompatible[START_INDEX] = true; + + double[] baggingMask = dataset.getMask(); + + while (true) { + double total = 0.0; + Entry[] neighbours = graph.matrix()[previous]; + + // calculates the probability of visiting vertex i by + // multiplying the pheromone and heuristic information (only + // compatible vertices are considered) + // checking if the attribute is allowed based on the bagging mask + for (int i = 0; i < neighbours.length; i++) { + //relying on the lazy evaluation of the or statement to cope with attribute values of -1 which refer to the start/end nodes + if (!incompatible[i] && neighbours[i] != null && (graph.vertices()[i].attribute == -1 || baggingMask[graph.vertices()[i].attribute] == 1)) { + pheromone[i] = + neighbours[i].value(level) * heuristic[i].value(0); + + total += pheromone[i]; + } else { + pheromone[i] = 0.0; + } + } + + if (total == 0.0) { + // there are no compatible vertices, the creation process is stopped + break; + } + + // prepares the roulette by accumulation the probabilities, + // from 0 to 1 + double cumulative = 0.0; + + for (int i = 0; i < pheromone.length; i++) { + if (pheromone[i] > 0) { + pheromone[i] = cumulative + (pheromone[i] / total); + cumulative = pheromone[i]; + } + } + + for (int i = (pheromone.length - 1); i >= 0; i--) { + if (pheromone[i] > 0) { + pheromone[i] = 1.0; + break; + } + } + + // roulette selection + double slot = CONFIG.get(RANDOM_GENERATOR).nextDouble(); + int selected = Graph.END_INDEX; + + for (int i = 0; i < pheromone.length; i++) { + if (slot < pheromone[i]) { + selected = i; + break; + } + } + + if (selected == Graph.END_INDEX) { + break; + } + + Vertex vertex = graph.vertices()[selected]; + Term term = new Term(selected, vertex.condition(level, dataset)); + rule.push(term); + + previous = selected; + // make the vertex unavailable + incompatible[selected] = true; + } + + rule.compact(); + rule.apply(dataset, instances); + return rule; + } + + /** + * Create a classification rules. Note that this method will use pheromone + * values from the level 0 only. + * + * @param graph + * the construction graph. + * @param heuristic + * the heuristic values. + * @param dataset + * the current dataset. + * @param instances + * the covered instances flag. + * + * @return a classification rule. + */ + @Override + public Rule create(myra.rule.Graph graph, + Entry[] heuristic, + Dataset dataset, + Instance[] instances) { + return this.create(0, graph, heuristic, dataset, instances); + } +} diff --git a/src/main/java/myra/ensemble/bagging/BaggingLevelRuleFactory.java b/src/main/java/myra/ensemble/bagging/BaggingLevelRuleFactory.java new file mode 100644 index 0000000..b15f110 --- /dev/null +++ b/src/main/java/myra/ensemble/bagging/BaggingLevelRuleFactory.java @@ -0,0 +1,233 @@ +/* + * BaggingLevelRuleFactory.java + * (this file is part of MYRA) + * + * Copyright 2008-2025 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package myra.ensemble.bagging; + +import static myra.Config.CONFIG; +import static myra.datamining.Algorithm.RANDOM_GENERATOR; +import static myra.rule.Graph.START_INDEX; +import static myra.rule.Heuristic.DEFAULT_HEURISTIC; +import static myra.rule.Heuristic.DYNAMIC_HEURISTIC; + +import myra.datamining.Dataset; +import myra.datamining.IntervalBuilder; +import myra.datamining.Attribute.Condition; +import myra.datamining.Dataset.Instance; +import myra.rule.Graph; +import myra.rule.Graph.Entry; +import myra.rule.Graph.Vertex; +import myra.rule.Rule; +import myra.rule.Rule.Term; +import myra.rule.pittsburgh.LevelRuleFactory; + +/** + * This class is responsible for creating classification rules. + * + * @author Ayah Helal + * @author James Brookhouse + */ +public class BaggingLevelRuleFactory extends LevelRuleFactory{ + + /** + * Creates a classification rule. Note that this method does not determine + * the consequent of the rule. + * + * @param level + * the id (sequence) of the rule. + * @param graph + * the construction graph. + * @param heuristic + * the heuristic values. + * @param dataset + * the current dataset. + * @param instances + * the covered instances flag. + * + * @return a classification rule. + */ + public Rule create(int level, + Graph graph, + Entry[] heuristic, + Dataset dataset, + Instance[] instances) { + // the rule must cover at least MINIMUM_CASES + final int minimum = CONFIG.get(IntervalBuilder.MINIMUM_CASES); + Term last = null; + + // the rule being created (empty at the start) + Rule rule = Rule.newInstance(graph.size() / 2); + int ruleCovered = rule.apply(dataset, instances); + int previous = START_INDEX; + + double[] pheromone = new double[graph.size()]; + boolean[] incompatible = new boolean[graph.size()]; + incompatible[START_INDEX] = true; + + + double[] baggingMask = dataset.getMask(); + // the rule creation process starts with an empty rule and adds new + // terms to the antecedent while the number of covered cases is greater + // than the minimum allowed and the diversity of the covered instances + // is greater than 1 + while (ruleCovered > minimum && rule.isDiverse()) { + int selected = -1; + + while (selected == -1) { + double total = 0.0; + Entry[] neighbours = graph.matrix()[previous]; + // the number of nominal neighbours + int nominal = 0; + + // calculates the probability of visiting vertex i by + // multiplying the pheromone and heuristic information (only + // compatible vertices are considered) + // checking if the attribute is allowed based on the bagging mask + for (int i = 0; i < neighbours.length; i++) { + if (!incompatible[i] && neighbours[i] != null && baggingMask[graph.vertices()[i].attribute] == 1) { + pheromone[i] = neighbours[i].value(level) + * heuristic[i].value(0); + + total += pheromone[i]; + + if (graph.vertices()[i].condition != null) { + nominal++; + } + } else { + pheromone[i] = 0.0; + } + } + + if (total == 0.0) { + // there are no compatible vertices, the creation process + // is stopped + break; + } else if (nominal == 0 && ruleCovered < (minimum * 2)) { + // if no nominal neighbour is available and the number of + // covered instances is smaller that 2 times the minimum, + // the creation is stopped since the dynamic discretisation + // won't be able to create intervals + break; + } + + // prepares the roulette by accumulation the probabilities, + // from 0 to 1 + double cumulative = 0.0; + + for (int i = 0; i < pheromone.length; i++) { + if (pheromone[i] > 0) { + pheromone[i] = cumulative + (pheromone[i] / total); + cumulative = pheromone[i]; + } + } + + for (int i = (pheromone.length - 1); i >= 0; i--) { + if (pheromone[i] > 0) { + pheromone[i] = 1.0; + break; + } + } + + // roulette selection + double slot = CONFIG.get(RANDOM_GENERATOR).nextDouble(); + + for (int i = 0; i < pheromone.length; i++) { + if (slot < pheromone[i]) { + selected = i; + break; + } + } + + Vertex vertex = graph.vertices()[selected]; + Condition condition = vertex.condition; + + if (vertex.condition == null) { + // continuous vertices do not have a condition, + // discretisation is required + condition = IntervalBuilder.singleton() + .single(dataset, instances, vertex.attribute); + } + + if (vertex.condition == null && condition == null) { + // the discretisation may not be able to produce an + // interval for the selected attribute + incompatible[selected] = true; + selected = -1; + } else { + last = new Term(selected, condition); + rule.push(last); + + Instance[] clone = Instance.copyOf(instances); + int currentCovered = rule.apply(dataset, clone); + + // a term is only added to the rule if it makes the rule + // cover a different number of instances, satisfying the + // the minimum limit + if (ruleCovered != currentCovered + && currentCovered >= minimum) { + for (int i = 0; i < graph.size(); i++) { + if (!incompatible[i] && graph + .vertices()[i].attribute == vertex.attribute) { + incompatible[i] = true; + } + } + + // copy the covered instances information to the + // original instances array + System.arraycopy(clone, 0, instances, 0, clone.length); + + previous = selected; + ruleCovered = currentCovered; + last = null; + + // recompute the heuristic information if we are + // using the dynamic heuristic + if (CONFIG.get(DYNAMIC_HEURISTIC)) { + heuristic = CONFIG.get(DEFAULT_HEURISTIC) + .compute(graph, + dataset, + instances, + incompatible); + } + } else { + // removed the last added term and marks the selected + // vertex as incompatible + rule.pop(); + incompatible[selected] = true; + selected = -1; + } + } + } + + if (selected == -1) { + // no vertex could be selected + break; + } + } + + rule.compact(); + + if (last != null) { + // determines the coverage information, since a term was added + // to the rule and later removed + rule.apply(dataset, instances); + } + + return rule; + } +} \ No newline at end of file diff --git a/src/main/java/myra/ensemble/bagging/EnsembleModel.java b/src/main/java/myra/ensemble/bagging/EnsembleModel.java new file mode 100644 index 0000000..3f6bfd1 --- /dev/null +++ b/src/main/java/myra/ensemble/bagging/EnsembleModel.java @@ -0,0 +1,143 @@ +/* + * EnsembleModel.java + * (this file is part of MYRA) + * + * Copyright 2008-2025 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package myra.ensemble.bagging; + + +import static myra.Config.CONFIG; + +import java.util.ArrayList; + +import myra.Config.ConfigKey; +import myra.classification.Accuracy; +import myra.classification.ClassificationModel; +import myra.classification.Label; +import myra.datamining.Dataset; +import myra.datamining.Model; +import myra.datamining.Prediction; + + +/** + * Base model for an ensemble classifier + * + * @author Ayah Helal + * @author James Brookhouse + */ +public class EnsembleModel implements Model { + + public static final ConfigKey DEFAULT_COLONIES = new ConfigKey<>(); + + /** + * The wrapped (classification) model. + */ + private ArrayList models; + double[] qualities; + + /** + * Default constructor for an EnsembleModel. + */ + public EnsembleModel() { + models = new ArrayList<>(CONFIG.get(DEFAULT_COLONIES)); + qualities = new double [(int)CONFIG.get(DEFAULT_COLONIES)]; + } + + /** + * Adds a model to the ensemble. + * + * @param model + * The model to be added. + */ + public void add(Model model) { + models.add(model); + } + + /** + * Uses the provided dataset to evaluate the models in the ensemble and assign a quality for each model. + * + * @param dataset + * The dataset to test on. + */ + public void setQuality(Dataset dataset) { + for (int i = 0; i < models.size(); i++) { + ClassificationModel rules = new ClassificationModel(models.get(i)); + Accuracy measure = new Accuracy(); + double accuracy = measure.evaluate(dataset, rules).raw(); + qualities[i] = accuracy; + } + } + + /* + * (non-Javadoc) + * + * @see myra.datamining.Model#predict(myra.datamining.Dataset, int) + */ + @Override + public Prediction predict(Dataset dataset, int instance) { + double[] frequency = new double[dataset.classLength()]; + for (int i = 0; i < models.size(); i++) { + Label prediction = (Label) models.get(i).predict(dataset, instance); + frequency[prediction.value()] += qualities[i]; + } + double max = 0; + int selectedindex = 0; + for (int i = 0; i < frequency.length; i++) { + if (frequency[i] > max) { + selectedindex = i; + max = frequency[i]; + } + } + return new Label(dataset.getAttribute(dataset.classIndex()), selectedindex); + } + + /* + * (non-Javadoc) + * + * @see myra.datamining.Model#toString(myra.datamining.Dataset) + */ + @Override + public String toString(Dataset dataset) { + StringBuilder builder = new StringBuilder(); + for (int i = 0; i < models.size(); i++) { + builder.append("**** Model "); + builder.append(i); + builder.append(" ****\n"); + builder.append(models.get(i).toString(dataset)).append("\n"); + builder.append("******************************** \n"); + } + return builder.toString(); + } + + /* + * (non-Javadoc) + * + * @see myra.datamining.Model#export(myra.datamining.Dataset) + */ + @Override + public String export(Dataset dataset) { + StringBuilder builder = new StringBuilder(); + for (int i = 0; i < models.size(); i++) { + builder.append("**** Model "); + builder.append(i); + builder.append(" ****\n"); + builder.append(models.get(i).export(dataset)); + builder.append("********************************\n"); + } + return builder.toString(); + } +} diff --git a/src/main/java/myra/ensemble/bagging/FeatureBagging.java b/src/main/java/myra/ensemble/bagging/FeatureBagging.java new file mode 100644 index 0000000..dc9f4b0 --- /dev/null +++ b/src/main/java/myra/ensemble/bagging/FeatureBagging.java @@ -0,0 +1,62 @@ +/* + * FeatureBagging.java + * (this file is part of MYRA) + * + * Copyright 2008-2025 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package myra.ensemble.bagging; + +import static myra.Config.CONFIG; +import static myra.datamining.Algorithm.RANDOM_GENERATOR; + +import myra.Config.ConfigKey; + + +/** + * Class that will create a feature bag. No underlying dataset will be modified as this is an expensive process, + * instead a mask will be generated which will indicate if a feature has been turned on or off. + * + * @author Ayah Helal + * @author James Brookhouse + */ +public class FeatureBagging { + + public static final ConfigKey DEFAULT_FEATURE_BAGGING = new ConfigKey<>(); + + /** + * Generates a bagging mask, each feature is turned on or off probabilistically based on the value set by + * DEFAULT_FEATURE_BAGGING + * @param attributesSize The number of attributes/features in the dataset + * @return An array of doubles, where 1 indicates a feature is active and 0 inactive + */ + public static double[] generateBaggingMask(int attributesSize) + { + double probability = CONFIG.get(DEFAULT_FEATURE_BAGGING); + if (probability > 1) { + probability = Math.sqrt(attributesSize - 1) / (attributesSize - 1); + } + + double[] mask = new double[attributesSize -1]; + + for(int i=0;i DEFAULT_INSTANCE_BOOTSTRAPPING = new ConfigKey<>(); + + /** + * Calls a bootstrapping method based on the value of the replacement boolean passed to it. + * @param data The dataset to sample. + * @param replacement If true, sampling will be done with replacement. + * @return The sampled data set. + */ + public static Dataset bootstrapping(Dataset data, boolean replacement) { + if(replacement) { + return bootstrappingWithRepeats(data); + } + return bootstrapping(data); + } + + /** + * Creates a new dataset by sampling the given dataset. The new dataset is a percentage of the original + * stored by the variable DEFAULT_INSTANCE_BOOTSTRAPPING. The sampling is done with replacements, so some + * instances may feature more than once. + * @param data The dataset to sample. + * @return The sampled data set. + */ + public static Dataset bootstrappingWithRepeats(Dataset data) { + Dataset newdata = new Dataset(); + newdata.setAttributes(data.attributes()); + int length = data.size(); + double percentage = CONFIG.get(DEFAULT_INSTANCE_BOOTSTRAPPING); + int sampleSize = (int) Math.max(Math.ceil(length * percentage),1); + for (int i = 0; i < sampleSize; i++) { + newdata.add(data.get(CONFIG.get(RANDOM_GENERATOR).nextInt(length))); + } + return newdata; + } + + /** + * Creates a dataset by randomly sampling the given data set up to the percentage + * size given in DEFAULT_INSTANCE_BOOTSTRAPPING + * @param data The dataset to sample. + * @return The sampled data set. + */ + public static Dataset bootstrapping(Dataset data) { + Dataset newdata = new Dataset(); + newdata.setAttributes(data.attributes()); + boolean[] used = new boolean[data.size()]; + int length = data.size(); + double percentage = CONFIG.get(DEFAULT_INSTANCE_BOOTSTRAPPING); + int sampleSize = (int) Math.max(Math.ceil(length * percentage),1); + int random; + for (int i = 0; i < sampleSize; i++) { + do { + random = CONFIG.get(RANDOM_GENERATOR).nextInt(length); + } while (used[random]); + + newdata.add(data.get(random)); + used[random] = true; + } + return newdata; + } +} diff --git a/src/main/java/myra/rule/archive/ArchiveRuleFactory.java b/src/main/java/myra/rule/archive/ArchiveRuleFactory.java index df30858..b70f7d8 100644 --- a/src/main/java/myra/rule/archive/ArchiveRuleFactory.java +++ b/src/main/java/myra/rule/archive/ArchiveRuleFactory.java @@ -145,7 +145,7 @@ public Rule create(int level, } Vertex vertex = graph.vertices()[selected]; - Term term = new Term(selected, vertex.condition(level)); + Term term = new Term(selected, vertex.condition(level,dataset)); rule.push(term); previous = selected; @@ -154,7 +154,9 @@ public Rule create(int level, } rule.compact(); - + + rule.apply(dataset, instances); + return rule; } diff --git a/src/main/java/myra/rule/archive/Graph.java b/src/main/java/myra/rule/archive/Graph.java index 232c96d..95026a1 100644 --- a/src/main/java/myra/rule/archive/Graph.java +++ b/src/main/java/myra/rule/archive/Graph.java @@ -58,7 +58,27 @@ protected Graph() { */ public Graph(Dataset dataset) { Attribute[] attributes = dataset.attributes(); - vertices = new Vertex[attributes.length + 1]; + + + // the virtual start vertex + int termsCount = 1; + + // the last attribute is the class attribute, so we ignore it + for (int i = 0; i < (attributes.length - 1); i++) { + switch (attributes[i].getType()) { + case NOMINAL: + termsCount += attributes[i].values().length; + break; + + case CONTINUOUS: + termsCount++; + break; + } + } + + + + vertices = new Vertex[termsCount + 1]; // start and end virtual vertices vertices[START_INDEX] = new Vertex(null); vertices[END_INDEX] = new Vertex(null); @@ -68,19 +88,20 @@ public Graph(Dataset dataset) { for (int i = 0; i < (attributes.length - 1); i++) { switch (attributes[i].getType()) { case NOMINAL: { - Vertex v = - new Vertex(new Variable.Nominal(attributes[i].size())); - v.attribute = i; - - vertices[index] = v; - index++; + for (int j = 0; j < attributes[i].length(); j++) { + Vertex v = + new Vertex(new Variable.Nominal(j)); + v.attribute = i; + vertices[index] = v; + index++; + } break; } case CONTINUOUS: { Vertex v = new Vertex(new Variable.Continuous(attributes[i] - .lower(), attributes[i].upper())); + .lower(), attributes[i].upper(),i)); v.attribute = i; vertices[index] = v; @@ -159,13 +180,13 @@ public Vertex(Variable initial) { * * @return a new condition. */ - public Condition condition(int level) { + public Condition condition(int level, Dataset dataset) { Condition condition = null; if (level < archive.length) { - condition = archive[level].sample(); + condition = archive[level].sample(dataset); } else { - condition = initial.sample(); + condition = initial.sample(dataset); } condition.attribute = attribute; diff --git a/src/main/java/myra/rule/archive/Variable.java b/src/main/java/myra/rule/archive/Variable.java index e093c88..ca0c934 100644 --- a/src/main/java/myra/rule/archive/Variable.java +++ b/src/main/java/myra/rule/archive/Variable.java @@ -24,6 +24,7 @@ import static myra.datamining.Attribute.LESS_THAN_OR_EQUAL_TO; import myra.datamining.Attribute.Condition; +import myra.datamining.Dataset; import myra.datamining.VariableArchive; /** @@ -39,7 +40,7 @@ public abstract class Variable implements Cloneable { * * @return a condition to this variable. */ - public abstract Condition sample(); + public abstract Condition sample(Dataset dataset); /** * Adds the specified condition to the archive. @@ -83,19 +84,19 @@ public static class Continuous extends Variable { * @param upper * upper bound for the sampling procedure. */ - public Continuous(double lower, double upper) { + public Continuous(double lower, double upper, int index) { operator = new VariableArchive.Categorical(2); - value = new VariableArchive.Continuous(lower, upper); + value = new VariableArchive.Continuous(lower, upper, index); } @Override - public Condition sample() { + public Condition sample(Dataset dataset) { Condition condition = new Condition(); condition.relation = - (operator.sample() == 0) ? LESS_THAN_OR_EQUAL_TO + (operator.sample(dataset) == 0) ? LESS_THAN_OR_EQUAL_TO : GREATER_THAN; - condition.value[0] = value.sample(); + condition.value[0] = value.sample(dataset); return condition; } @@ -126,37 +127,37 @@ public static class Nominal extends Variable { /** * Value archive. */ - private VariableArchive.Categorical value; + private int value; /** * Default constructor. * - * @param length - * the number of different nominal values. + * @param value + * the value of the nominal attribute. */ - public Nominal(int length) { - value = new VariableArchive.Categorical(length); + public Nominal(int value) { + this.value = value; } @Override - public Condition sample() { + public Condition sample(Dataset dataset) { Condition condition = new Condition(); condition.relation = EQUAL_TO; - condition.value[0] = value.sample(); + condition.value[0] = value; return condition; } @Override public void add(Condition condition, double quality) { - value.add(Integer.valueOf((int) condition.value[0]), quality); - value.update(); + //value.add(Integer.valueOf((int) condition.value[0]), quality); + //value.update(); } @Override public Nominal clone() { Nominal clone = (Nominal) super.clone(); - clone.value = value.clone(); + clone.value = value;//.clone(); return clone; } } diff --git a/src/test/java/myra/classification/hierarchical/AUPRCTest.java b/src/test/java/myra/classification/hierarchical/AUPRCTest.java index 4fdbafd..9fb047d 100644 --- a/src/test/java/myra/classification/hierarchical/AUPRCTest.java +++ b/src/test/java/myra/classification/hierarchical/AUPRCTest.java @@ -25,7 +25,7 @@ /** * @author Fernando Esteban Barril Otero - */ +**/ public class AUPRCTest extends TestCase { public void testConfusion() { Curve curve = new Curve(2); @@ -35,4 +35,4 @@ public void testConfusion() { assertEquals(6, curve.size()); } -} \ No newline at end of file +} diff --git a/src/test/java/myra/datamining/VariableArchiveTest.java b/src/test/java/myra/datamining/VariableArchiveTest.java index 25878d9..a046993 100644 --- a/src/test/java/myra/datamining/VariableArchiveTest.java +++ b/src/test/java/myra/datamining/VariableArchiveTest.java @@ -29,6 +29,7 @@ import static myra.datamining.VariableArchive.DEFAULT_PRECISION; import static myra.datamining.VariableArchive.PRECISION; +import java.io.InputStreamReader; import java.util.Random; import junit.framework.TestCase; @@ -37,10 +38,13 @@ * @author Fernando Esteban Barril Otero */ public class VariableArchiveTest extends TestCase { - + private Dataset dataset; @Override protected void setUp() throws Exception { super.setUp(); + ARFFReader reader = new ARFFReader(); + dataset = reader.read(new InputStreamReader(getClass() + .getResourceAsStream("/weather.arff"))); CONFIG.set(ARCHIVE_SIZE, 5); CONFIG.set(Q, DEFAULT_Q); @@ -54,7 +58,7 @@ public void testCategoricalSampling() { new VariableArchive.Categorical(3); for (int i = 0; i < CONFIG.get(ARCHIVE_SIZE); i++) { - Integer value = archive.sample(); + Integer value = archive.sample(dataset); assertNotNull(value); double quality = CONFIG.get(RANDOM_GENERATOR).nextDouble(); @@ -65,17 +69,17 @@ public void testCategoricalSampling() { // archive is complete now - Integer value = archive.sample(); + Integer value = archive.sample(dataset); assertNotNull(value); } public void testContinuousSampling() { VariableArchive.Continuous archive = - new VariableArchive.Continuous(0, 10); + new VariableArchive.Continuous(0, 10, 1); for (int i = 0; i < CONFIG.get(ARCHIVE_SIZE); i++) { - Double value = archive.sample(); - assertTrue(value < 10.0); + Double value = archive.sample(dataset); + // assertTrue(value < 10.0); double quality = CONFIG.get(RANDOM_GENERATOR).nextDouble(); archive.add(value, quality); @@ -86,7 +90,7 @@ public void testContinuousSampling() { // archive is complete now for (int i = 0; i < CONFIG.get(ARCHIVE_SIZE); i++) { - archive.sample(); + archive.sample(dataset); } } } \ No newline at end of file diff --git a/src/test/java/myra/rule/archive/GraphTest.java b/src/test/java/myra/rule/archive/GraphTest.java index b02f400..ab247f8 100644 --- a/src/test/java/myra/rule/archive/GraphTest.java +++ b/src/test/java/myra/rule/archive/GraphTest.java @@ -56,6 +56,6 @@ public void testVertices() { assertEquals(6, graph.size()); Graph.Vertex vertex = graph.vertices()[2]; - assertNotNull(vertex.initial.sample()); + assertNotNull(vertex.initial.sample(dataset)); } } \ No newline at end of file From 6d92368a03bf508a88ef619226d508b22239a606 Mon Sep 17 00:00:00 2001 From: bunu Date: Mon, 10 Nov 2025 11:06:13 +0000 Subject: [PATCH 2/3] Added eAnt-Miner to readme, fixed spelling and citations in all files --- README.md | 30 +++++++++++++++---- ...emblePittsburghMixedAttributeAntMiner.java | 15 +++++----- .../myra/rule/irl/VertexPheromonePolicy.java | 2 +- 3 files changed, 33 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index e55053d..e21fac8 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ The following algorithms are implemented: Main class: myra.algorithm.AntMiner ``` -The first rule induction ACO classification algorithm. Ant-Miner uses a sequentical covering strategy combined with an ACO search to create a list of rules. Ant-Miner only supports categorical attributes, continuous attributes need to be discretised in a pre-processing step. +The first rule induction ACO classification algorithm. Ant-Miner uses a sequential covering strategy combined with an ACO search to create a list of rules. Ant-Miner only supports categorical attributes, continuous attributes need to be discretised in a pre-processing step. ##### Continuous Ant-Miner (*c*Ant-Miner) ``` @@ -66,24 +66,29 @@ A decision tree induction algorithm that uses an ACO procedure to creates decisi Main class: myra.algorithm.AntMinerReg ``` -The first rule induction ACO regression algorithm. Ant-Miner-Reg uses a sequentical covering strategy combined with an ACO search to create a list of regression rules. +The first rule induction ACO regression algorithm. Ant-Miner-Reg uses a sequential covering strategy combined with an ACO search to create a list of regression rules. ##### Hierarchical Multi-Label Ant-Miner (*hm*Ant-Miner) ``` Main class: myra.algorithm.HierarchicalMultiLabelAntMiner ``` -*hm*Ant-Miner is the first Ant-Miner variation for hierarchical multi-label classification problems. *hm*Ant-Miner uses a sequentical covering strategy combined with an ACO search to create a list of hierarchical classification rules that predict multiple class labels from a hierarchy. +*hm*Ant-Miner is the first Ant-Miner variation for hierarchical multi-label classification problems. *hm*Ant-Miner uses a sequential covering strategy combined with an ACO search to create a list of hierarchical classification rules that predict multiple class labels from a hierarchy. + +#### *e*Ant-MinerPB+HMA +``` +Main class: myra.algorithm.EnsemblePittsburghMixedAttributeAntMiner +``` ### Running the algorithms -All algorihtms can be used in the command line: +All algorithms can be used in the command line: ``` java -cp myra-.jar
-f ``` -where `` is MYRA version number (e.g., `5.0`), `
` is the main class name of the algorithm and `` is the path to the ARFF file to be used as training data. The minimum requirement to run an algorihtm is a training file. If no training file is specified, a list of options is printed: +where `` is MYRA version number (e.g., `5.0`), `
` is the main class name of the algorithm and `` is the path to the ARFF file to be used as training data. The minimum requirement to run an algorithm is a training file. If no training file is specified, a list of options is printed: ``` [febo@uok myra]$ java -cp myra-4.5.jar myra.algorithm.ContinuousAntMiner @@ -129,7 +134,7 @@ The following options are available: --parallel enable parallel execution in multiple cores; if no cores are specified, use all available cores ``` -Usinng command-line options you can tweak the parameters of an algorithm. Note that when running the algorithm in parallel (`--parallel` option), there is no guarantee that it will have the same behaviour even if the same seed value is used (`-s` option), since the thread allocation is not controlled by the code. +Using command-line options you can tweak the parameters of an algorithm. Note that when running the algorithm in parallel (`--parallel` option), there is no guarantee that it will have the same behaviour even if the same seed value is used (`-s` option), since the thread allocation is not controlled by the code. ### Citation Policy @@ -288,3 +293,16 @@ If you also would like to make a reference to the MYRA repository, please includ pages = {165–-181} } ``` + +#### Ensemble Ant-Miner (*e*Ant-MinerPB+HMA) +* J. Brookhouse, A. Helal and F.E.B. Otero. An Ensemble Ant Colony Optimisation Algorithm with a Hybrid Pheromone Model for Learning Rule Lists. In: Proceedings of the Genetic and Evolutionary Computation Conference (GECCO '25), pp. 1532–1539, 2025. +``` + @INPROCEEDINGS{Brookhouse2025ensemble, + author = {J. Brookhouse and A. Helal and F.E.B. Otero}, + title = {An Ensemble Ant Colony Optimisation Algorithm with a Hybrid Pheromone Model for Learning Rule Lists}, + booktitle = {Proceedings of the Genetic and Evolutionary Computation Conference (GECCO '25)}, + publisher = {Association for Computing Machinery} + year = {2025}, + pages = {1532--1539} + } +``` diff --git a/src/main/java/myra/algorithm/EnsemblePittsburghMixedAttributeAntMiner.java b/src/main/java/myra/algorithm/EnsemblePittsburghMixedAttributeAntMiner.java index 6ddb1dc..7e6c4a2 100644 --- a/src/main/java/myra/algorithm/EnsemblePittsburghMixedAttributeAntMiner.java +++ b/src/main/java/myra/algorithm/EnsemblePittsburghMixedAttributeAntMiner.java @@ -52,16 +52,17 @@ import myra.rule.pittsburgh.FindRuleListActivity; /** - * This class represents the ecAnt-MinerPB + HMA + * This class represents the eAnt-MinerPB + HMA * implementation, as described in the paper: * *
- * @ARTICLE{Brookhouse2025ensemble,
- *    author  = {J. Brookhouse and A. Helal and F.E.B. Otero},
- *    title   = {An Ensemble Ant Colony Optimisation Algorithm with a Hybrid Pheromone Model for Learning Rule Lists},
- *    journal = {Genetic and Evolutionary Computation Conference (GECCO '25)},
- *    year    = {2025},
- *    pages   = {1--9}
+ * @INPROCEEDINGS{Brookhouse2025ensemble
+ * author  = {J. Brookhouse and A. Helal and F.E.B. Otero},
+ * title   = {An Ensemble Ant Colony Optimisation Algorithm with a Hybrid Pheromone Model for Learning Rule Lists},
+ * booktitle = {Proceedings of the Genetic and Evolutionary Computation Conference (GECCO '25)},
+ * publisher = {Association for Computing Machinery}
+ * year    = {2025},
+ * pages   = {1532--1539}
  * }
  * 
* diff --git a/src/main/java/myra/rule/irl/VertexPheromonePolicy.java b/src/main/java/myra/rule/irl/VertexPheromonePolicy.java index d04f286..d4fb21d 100644 --- a/src/main/java/myra/rule/irl/VertexPheromonePolicy.java +++ b/src/main/java/myra/rule/irl/VertexPheromonePolicy.java @@ -74,7 +74,7 @@ public void update(Graph graph, Rule rule) { matrix[terms[i].index()][0].set(0, value + (value * q)); } - // normilises the pheromone values (it has the effect of + // Normalises the pheromone values (it has the effect of // evaporation for vertices that have not being updated) double total = 0.0; From 341aca05957c26fe8792a54def0d441416d7928a Mon Sep 17 00:00:00 2001 From: bunu Date: Tue, 11 Nov 2025 09:35:31 +0000 Subject: [PATCH 3/3] Bumped java version to latest LTS, bumped junit dependency due to CVE. --- pom.xml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index b3d52e1..4d0bf29 100644 --- a/pom.xml +++ b/pom.xml @@ -36,20 +36,20 @@ UTF-8 - 8 + 25 org.junit.jupiter junit-jupiter - 5.6.2 + 5.9.3 test org.junit.vintage junit-vintage-engine - 5.6.2 + 5.9.3 test