hypernetwork-research-group · tizianocitro · Apr 28, 2026 · Apr 27, 2026 · Apr 27, 2026 · Apr 28, 2026
diff --git a/examples/early_stopping.py b/examples/early_stopping.py
@@ -0,0 +1,155 @@
+from torchmetrics import MetricCollection
+from torchmetrics.classification import (
+    BinaryAUROC,
+    BinaryAveragePrecision,
+    BinaryPrecision,
+    BinaryRecall,
+)
+from lightning.pytorch.callbacks import EarlyStopping
+from hyperbench.hlp import MLPHlpModule
+from hyperbench.nn import LaplacianPositionalEncodingEnricher
+from hyperbench.train import MultiModelTrainer, RandomNegativeSampler
+from hyperbench.types import HData, ModelConfig
+from hyperbench.data import AlgebraDataset, DataLoader, SamplingStrategy
+
+
+if __name__ == "__main__":
+    verbose = False
+    num_workers = 8
+    num_features = 32
+    sampling_strategy = SamplingStrategy.HYPEREDGE
+    metrics = MetricCollection(
+        {
+            "auc": BinaryAUROC(),
+            "avg_precision": BinaryAveragePrecision(),
+            "precision": BinaryPrecision(),
+            "recall": BinaryRecall(),
+        }
+    )
+
+    print("Loading and preparing dataset...")
+
+    dataset = AlgebraDataset(sampling_strategy=sampling_strategy, prepare=True)
+    if verbose:
+        print(f"Dataset:\n {dataset.hdata}\n")
+
+    # Split dataset into train and test (80/20)
+    train_dataset, test_dataset = dataset.split(
+        ratios=[0.8, 0.2], shuffle=True, seed=42, node_space_setting="transductive"
+    )
+
+    # Split train into train and val (87.5/12.5 of train = 70/10 of total)
+    train_dataset, val_dataset = train_dataset.split(
+        ratios=[0.875, 0.125], shuffle=True, seed=42, node_space_setting="transductive"
+    )
+    if verbose:
+        print(f"Train dataset (before train/val split):\n {train_dataset.hdata}\n")
+        print(f"Train dataset (after train/val split):\n {train_dataset.hdata}\n")
+        print(f"Val dataset:\n {val_dataset.hdata}\n")
+        print(f"Test dataset:\n {test_dataset.hdata}\n")
+
+    # Save train hyperedge index before adding negatives (for CommonNeighbors)
+    train_hyperedge_index = train_dataset.hdata.hyperedge_index
+
+    # Add negative samples to all splits
+    for name, ds in [("Train", train_dataset), ("Val", val_dataset), ("Test", test_dataset)]:
+        num_negative_samples = (
+            ds.hdata.num_hyperedges
+            if name in ["Train", "Val"]  # 1:1 ratio of pos:neg samples
+            else int(ds.hdata.num_hyperedges * 0.6)  # 60% negatives for test set
+        )
+        negative_sampler = RandomNegativeSampler(
+            num_negative_samples=num_negative_samples,
+            num_nodes_per_sample=int(ds.stats()["avg_degree_hyperedge"]),
+        )
+        neg_hdata = negative_sampler.sample(ds.hdata)
+        combined_hdata = HData.cat_same_node_space([ds.hdata, neg_hdata])
+        shuffled_hdata = combined_hdata.shuffle(seed=42)
+        ds_with_negatives = ds.update_from_hdata(shuffled_hdata)
+
+        if name == "Train":
+            train_dataset = ds_with_negatives
+        elif name == "Val":
+            val_dataset = ds_with_negatives
+        else:
+            test_dataset = ds_with_negatives
+
+        if verbose:
+            print(f"{name} dataset after adding negative samples: {shuffled_hdata}\n")
+
+    print("Enriching node features...")
+
+    train_dataset.enrich_node_features(
+        enricher=LaplacianPositionalEncodingEnricher(
+            num_features=num_features,
+            # In transductive setting, use total number of nodes to ensure consistent encoding across splits
+            # as the train dataset contain all nodes but may have no hyperedges where they appear
+            num_nodes=train_dataset.hdata.num_nodes,
+        ),
+        enrichment_mode="replace",
+    )
+    val_dataset.enrich_node_features_from(train_dataset)
+    test_dataset.enrich_node_features_from(train_dataset)
+
+    print("Creating dataloaders...")
+
+    train_loader = DataLoader(
+        train_dataset,
+        batch_size=128,  # or 256
+        shuffle=False,
+        num_workers=num_workers,
+        persistent_workers=True,
+    )
+    val_loader = DataLoader(
+        val_dataset,
+        sample_full_hypergraph=True,
+        shuffle=False,
+        num_workers=num_workers,
+        persistent_workers=True,
+    )
+    test_loader = DataLoader(
+        test_dataset,
+        sample_full_hypergraph=True,
+        shuffle=False,
+        num_workers=num_workers,
+        persistent_workers=True,
+    )
+
+    mean_mlp_module = MLPHlpModule(
+        encoder_config={
+            "in_channels": num_features,
+            "out_channels": num_features,
+            "hidden_channels": 64,
+            "num_layers": 3,
+            "drop_rate": 0.3,
+        },
+        aggregation="mean",
+        metrics=metrics,
+    )
+
+    configs = [
+        ModelConfig(name="mlp", version="mean", model=mean_mlp_module),
+    ]
+
+    early_stopping = EarlyStopping(
+        monitor="val_loss",
+        patience=10,
+        mode="min",
+    )
+
+    print("Starting training and evaluation...")
+
+    with MultiModelTrainer(
+        model_configs=configs,
+        max_epochs=200,
+        accelerator="auto",
+        log_every_n_steps=10,
+        callbacks=[early_stopping],
+        enable_checkpointing=False,
+        auto_start_tensorboard=True,
+        auto_wait=True,
+    ) as trainer:
+        trainer.fit_all(train_dataloader=train_loader, val_dataloader=val_loader, verbose=True)
+        trainer.test_all(dataloader=test_loader, verbose=True)
+
+    print("Complete!")
diff --git a/examples/hgnn.py b/examples/hgnn.py
@@ -5,7 +5,6 @@
     BinaryPrecision,
     BinaryRecall,
 )
-from lightning.pytorch.callbacks import EarlyStopping
 from hyperbench.hlp import HGNNHlpModule
 from hyperbench.nn import LaplacianPositionalEncodingEnricher
 from hyperbench.train import MultiModelTrainer, RandomNegativeSampler
@@ -16,6 +15,7 @@
 if __name__ == "__main__":
     verbose = False
     num_workers = 8
+    num_features = 32
     sampling_strategy = SamplingStrategy.HYPEREDGE
     metrics = MetricCollection(
         {
@@ -33,10 +33,14 @@
         print(f"Dataset:\n {dataset.hdata}\n")
 
     # Split dataset into train and test (80/20)
-    train_dataset, test_dataset = dataset.split(ratios=[0.8, 0.2], shuffle=True, seed=42)
+    train_dataset, test_dataset = dataset.split(
+        ratios=[0.8, 0.2], shuffle=True, seed=42, node_space_setting="transductive"
+    )
 
     # Split train into train and val (87.5/12.5 of train = 70/10 of total)
-    train_dataset, val_dataset = train_dataset.split(ratios=[0.875, 0.125], shuffle=True, seed=42)
+    train_dataset, val_dataset = train_dataset.split(
+        ratios=[0.875, 0.125], shuffle=True, seed=42, node_space_setting="transductive"
+    )
     if verbose:
         print(f"Train dataset (before train/val split):\n {train_dataset.hdata}\n")
         print(f"Train dataset (after train/val split):\n {train_dataset.hdata}\n")
@@ -75,11 +79,16 @@
     print("Enriching node features...")
 
     train_dataset.enrich_node_features(
-        enricher=LaplacianPositionalEncodingEnricher(num_features=32),
+        enricher=LaplacianPositionalEncodingEnricher(
+            num_features=num_features,
+            # In transductive setting, use total number of nodes to ensure consistent encoding across splits
+            # as the train dataset contain all nodes but may have no hyperedges where they appear
+            num_nodes=train_dataset.hdata.num_nodes,
+        ),
         enrichment_mode="replace",
     )
-    val_dataset.hdata.x = train_dataset.hdata.x[: val_dataset.hdata.num_nodes]
-    test_dataset.hdata.x = train_dataset.hdata.x[:, : test_dataset.hdata.num_nodes]
+    val_dataset.enrich_node_features_from(train_dataset)
+    test_dataset.enrich_node_features_from(train_dataset)
 
     print("Creating dataloaders...")
 
@@ -107,15 +116,15 @@
 
     mean_hgnn_module = HGNNHlpModule(
         encoder_config={
-            "in_channels": 32,
+            "in_channels": num_features,
             "hidden_channels": 16,
             "out_channels": 16,
             "bias": True,
             "use_batch_normalization": False,
             "drop_rate": 0.5,
         },
         aggregation="mean",
-        lr=0.01,
+        lr=0.001,
         weight_decay=5e-4,
         metrics=metrics,
     )
@@ -131,20 +140,13 @@
         ),
     ]
 
-    early_stopping = EarlyStopping(
-        monitor="val_loss",
-        patience=30,
-        mode="min",
-    )
-
     print("Starting training and evaluation...")
 
     with MultiModelTrainer(
         model_configs=configs,
         max_epochs=60,
         accelerator="auto",
         log_every_n_steps=1,
-        callbacks=[early_stopping],
         enable_checkpointing=False,
         auto_start_tensorboard=True,
         auto_wait=True,

diff --git a/examples/hgnnp.py b/examples/hgnnp.py
@@ -5,8 +5,6 @@
     BinaryPrecision,
     BinaryRecall,
 )
-from lightning.pytorch.callbacks import EarlyStopping
-
 from hyperbench.data import AlgebraDataset, DataLoader, SamplingStrategy
 from hyperbench.hlp import HGNNPHlpModule
 from hyperbench.nn import LaplacianPositionalEncodingEnricher
@@ -17,6 +15,7 @@
 if __name__ == "__main__":
     verbose = False
     num_workers = 8
+    num_features = 32
     sampling_strategy = SamplingStrategy.HYPEREDGE
     metrics = MetricCollection(
         {
@@ -33,8 +32,12 @@
     if verbose:
         print(f"Dataset:\n {dataset.hdata}\n")
 
-    train_dataset, test_dataset = dataset.split(ratios=[0.8, 0.2], shuffle=True, seed=42)
-    train_dataset, val_dataset = train_dataset.split(ratios=[0.875, 0.125], shuffle=True, seed=42)
+    train_dataset, test_dataset = dataset.split(
+        ratios=[0.8, 0.2], shuffle=True, seed=42, node_space_setting="transductive"
+    )
+    train_dataset, val_dataset = train_dataset.split(
+        ratios=[0.875, 0.125], shuffle=True, seed=42, node_space_setting="transductive"
+    )
     if verbose:
         print(f"Train dataset (before train/val split):\n {train_dataset.hdata}\n")
         print(f"Train dataset (after train/val split):\n {train_dataset.hdata}\n")
@@ -71,11 +74,16 @@
     print("Enriching node features...")
 
     train_dataset.enrich_node_features(
-        enricher=LaplacianPositionalEncodingEnricher(num_features=32),
+        enricher=LaplacianPositionalEncodingEnricher(
+            num_features=num_features,
+            # In transductive setting, use total number of nodes to ensure consistent encoding across splits
+            # as the train dataset contain all nodes but may have no hyperedges where they appear
+            num_nodes=train_dataset.hdata.num_nodes,
+        ),
         enrichment_mode="replace",
     )
-    val_dataset.hdata.x = train_dataset.hdata.x[: val_dataset.hdata.num_nodes]
-    test_dataset.hdata.x = train_dataset.hdata.x[:, : test_dataset.hdata.num_nodes]
+    val_dataset.enrich_node_features_from(train_dataset)
+    test_dataset.enrich_node_features_from(train_dataset)
 
     print("Creating dataloaders...")
 
@@ -103,7 +111,7 @@
 
     mean_hgnnp_module = HGNNPHlpModule(
         encoder_config={
-            "in_channels": 32,
+            "in_channels": num_features,
             "hidden_channels": 16,
             "out_channels": 16,
             "bias": True,
@@ -127,20 +135,13 @@
         ),
     ]
 
-    early_stopping = EarlyStopping(
-        monitor="val_loss",
-        patience=30,
-        mode="min",
-    )
-
     print("Starting training and evaluation...")
 
     with MultiModelTrainer(
         model_configs=configs,
         max_epochs=60,
         accelerator="auto",
         log_every_n_steps=1,
-        callbacks=[early_stopping],
         enable_checkpointing=False,
         auto_start_tensorboard=True,
         auto_wait=True,