diff --git a/.gitignore b/.gitignore
index 837ff76..164d66b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,4 @@
-venv
+*venv
 venv_pp
 /data/
 __pycache__/
@@ -10,3 +10,8 @@ gridfm_graphkit.egg-info
 mlruns
 *.pt
 .DS_Store
+integrationtests/data_out*
+.julia
+*logs*
+*data_out*
+site*
\ No newline at end of file
diff --git a/docs/datasets/data_modules.md b/docs/datasets/data_modules.md
index bf47118..a5e4dff 100644
--- a/docs/datasets/data_modules.md
+++ b/docs/datasets/data_modules.md
@@ -1,3 +1,3 @@
-# LitGridDataModule
+# LitGridHeteroDataModule
 
-::: gridfm_graphkit.datasets.powergrid_datamodule.LitGridDataModule
+::: gridfm_graphkit.datasets.hetero_powergrid_datamodule.LitGridHeteroDataModule
diff --git a/docs/datasets/data_normalization.md b/docs/datasets/data_normalization.md
index f41334d..1747fd1 100644
--- a/docs/datasets/data_normalization.md
+++ b/docs/datasets/data_normalization.md
@@ -3,12 +3,10 @@
 
 
 Normalization improves neural network training by ensuring features are well-scaled, preventing issues like exploding gradients and slow convergence. In power grids, where variables like voltage and power span wide ranges, normalization is essential.
-The `gridfm-graphkit` package offers four methods:
+The `gridfm-graphkit` package offers normalization methods based on the per-unit (p.u.) system:
 
-- [`Min-Max Normalization`](#minmaxnormalizer)
-- [`Standardization (Z-score)`](#standardizer)
-- [`Identity (no normalization)`](#identitynormalizer)
-- [`BaseMVA Normalization`](#basemvanormalizer)
+- [`BaseMVA Normalization`](#heterodatamvanormalizer)
+- [`Per-Sample BaseMVA Normalization`](#heterodatapersamplemvanormalizer)
 
 Each of these strategies implements a unified interface and can be used interchangeably depending on the learning task and data characteristics.
 
@@ -25,27 +23,15 @@ Each of these strategies implements a unified interface and can be used intercha
 
 ---
 
-### `MinMaxNormalizer`
+### `HeteroDataMVANormalizer`
 
-::: gridfm_graphkit.datasets.normalizers.MinMaxNormalizer
+::: gridfm_graphkit.datasets.normalizers.HeteroDataMVANormalizer
 
 ---
 
-### `Standardizer`
+### `HeteroDataPerSampleMVANormalizer`
 
-::: gridfm_graphkit.datasets.normalizers.Standardizer
-
----
-
-### `BaseMVANormalizer`
-
-::: gridfm_graphkit.datasets.normalizers.BaseMVANormalizer
-
----
-
-### `IdentityNormalizer`
-
-::: gridfm_graphkit.datasets.normalizers.IdentityNormalizer
+::: gridfm_graphkit.datasets.normalizers.HeteroDataPerSampleMVANormalizer
 
 ---
 
@@ -54,13 +40,18 @@ Each of these strategies implements a unified interface and can be used intercha
 Example:
 
 ```python
-from gridfm_graphkit.datasets.normalizers import MinMaxNormalizer
-import torch
+from gridfm_graphkit.datasets.normalizers import HeteroDataMVANormalizer
+from torch_geometric.data import HeteroData
+
+# Create normalizer
+normalizer = HeteroDataMVANormalizer(args)
+
+# Fit on training data
+params = normalizer.fit(data_path, scenario_ids)
 
-data = torch.randn(100, 5)  # Example tensor
+# Transform data
+normalizer.transform(hetero_data)
 
-normalizer = MinMaxNormalizer(node_data=True,args=None)
-params = normalizer.fit(data)
-normalized = normalizer.transform(data)
-restored = normalizer.inverse_transform(normalized)
+# Inverse transform to restore original scale
+normalizer.inverse_transform(hetero_data)
 ```
diff --git a/docs/datasets/powergrid.md b/docs/datasets/powergrid.md
index 45476ac..1f983a5 100644
--- a/docs/datasets/powergrid.md
+++ b/docs/datasets/powergrid.md
@@ -1,3 +1,3 @@
-## `GridDatasetDisk`
+## `HeteroGridDatasetDisk`
 
-::: gridfm_graphkit.datasets.powergrid_dataset.GridDatasetDisk
+::: gridfm_graphkit.datasets.powergrid_hetero_dataset.HeteroGridDatasetDisk
diff --git a/docs/datasets/transforms.md b/docs/datasets/transforms.md
index dd7f66d..0dcf981 100644
--- a/docs/datasets/transforms.md
+++ b/docs/datasets/transforms.md
@@ -2,26 +2,18 @@
 
 > Each transformation class inherits from [`BaseTransform`](https://pytorch-geometric.readthedocs.io/en/latest/modules/transforms.html#torch_geometric.transforms.BaseTransform) provided by [PyTorch Geometric](https://pytorch-geometric.readthedocs.io/).
 
-### `AddNormalizedRandomWalkPE`
+### `RemoveInactiveGenerators`
 
-::: gridfm_graphkit.datasets.transforms.AddNormalizedRandomWalkPE
+::: gridfm_graphkit.datasets.transforms.RemoveInactiveGenerators
 
-### `AddEdgeWeights`
+### `RemoveInactiveBranches`
 
-::: gridfm_graphkit.datasets.transforms.AddEdgeWeights
+::: gridfm_graphkit.datasets.transforms.RemoveInactiveBranches
 
-### `AddIdentityMask`
+### `ApplyMasking`
 
-::: gridfm_graphkit.datasets.transforms.AddIdentityMask
+::: gridfm_graphkit.datasets.transforms.ApplyMasking
 
-### `AddRandomMask`
+### `LoadGridParamsFromPath`
 
-::: gridfm_graphkit.datasets.transforms.AddRandomMask
-
-### `AddPFMask`
-
-::: gridfm_graphkit.datasets.transforms.AddPFMask
-
-### `AddOPFMask`
-
-::: gridfm_graphkit.datasets.transforms.AddOPFMask
+::: gridfm_graphkit.datasets.transforms.LoadGridParamsFromPath
diff --git a/docs/index.md b/docs/index.md
index e843631..e38d000 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -14,4 +14,4 @@ This library is brought to you by the GridFM team to train, finetune and interac
 
 
 
-## Citation: TBD
+## Citation: TBD
\ No newline at end of file
diff --git a/docs/install/installation.md b/docs/install/installation.md
index 07dc502..89ee4be 100644
--- a/docs/install/installation.md
+++ b/docs/install/installation.md
@@ -1,14 +1,18 @@
+# Installation
+
 You can install `gridfm-graphkit` directly from PyPI:
 
 ```bash
 pip install gridfm-graphkit
 ```
 
+For GPU support and compatibility with PyTorch Geometric's scatter operations, install PyTorch (and optionally CUDA) first, then install the matching `torch-scatter` wheel. See [PyTorch and torch-scatter](#pytorch-and-torch-scatter-optional) below.
+
 ---
 
 ## Development Setup
 
-To contribute or develop locally, clone the repository and install in editable mode:
+To contribute or develop locally, clone the repository and install in editable mode. Use Python 3.10, 3.11, or 3.12 (3.12 is recommended).
 
 ```bash
 git clone git@github.com:gridfm/gridfm-graphkit.git
@@ -18,6 +22,26 @@ source venv/bin/activate
 pip install -e .
 ```
 
+### PyTorch and torch-scatter (optional)
+
+If you need GPU acceleration or PyTorch Geometric scatter ops (used by the library), install PyTorch and the matching `torch-scatter` wheel:
+
+1. Install PyTorch (see [pytorch.org](https://pytorch.org/) for your platform and CUDA version).
+
+2. Get your Torch + CUDA version string:
+   ```bash
+   TORCH_CUDA_VERSION=$(python -c "import torch; print(torch.__version__ + ('+cpu' if torch.version.cuda is None else ''))")
+   ```
+
+3. Install the correct `torch-scatter` wheel:
+   ```bash
+   pip install torch-scatter -f https://data.pyg.org/whl/torch-${TORCH_CUDA_VERSION}.html
+   ```
+
+---
+
+## Optional extras
+
 For documentation generation and unit testing, install with the optional `dev` and `test` extras:
 
 ```bash
diff --git a/docs/models/models.md b/docs/models/models.md
index 9e822ca..7c8c5c6 100644
--- a/docs/models/models.md
+++ b/docs/models/models.md
@@ -1,10 +1,37 @@
 # Models
 
-### `GPSTransformer`
+### `GNS_heterogeneous`
 
-::: gridfm_graphkit.models.gps_transformer.GPSTransformer
+::: gridfm_graphkit.models.gnn_heterogeneous_gns.GNS_heterogeneous
 
+---
 
-### `GNN_TransformerConv`
+## Physics Decoders
 
-::: gridfm_graphkit.models.gnn_transformer.GNN_TransformerConv
+### `PhysicsDecoderOPF`
+
+::: gridfm_graphkit.models.utils.PhysicsDecoderOPF
+
+### `PhysicsDecoderPF`
+
+::: gridfm_graphkit.models.utils.PhysicsDecoderPF
+
+### `PhysicsDecoderSE`
+
+::: gridfm_graphkit.models.utils.PhysicsDecoderSE
+
+---
+
+## Utility Modules
+
+### `ComputeBranchFlow`
+
+::: gridfm_graphkit.models.utils.ComputeBranchFlow
+
+### `ComputeNodeInjection`
+
+::: gridfm_graphkit.models.utils.ComputeNodeInjection
+
+### `ComputeNodeResiduals`
+
+::: gridfm_graphkit.models.utils.ComputeNodeResiduals
diff --git a/docs/tasks/base_task.md b/docs/tasks/base_task.md
new file mode 100644
index 0000000..a0acbae
--- /dev/null
+++ b/docs/tasks/base_task.md
@@ -0,0 +1,216 @@
+# Base Task
+
+The `BaseTask` class is an abstract base class that provides the foundation for all task implementations in GridFM-GraphKit. It extends PyTorch Lightning's `LightningModule` and defines the common interface and shared functionality for training, validation, and testing.
+
+## Overview
+
+`BaseTask` serves as the parent class for all task-specific implementations, providing:
+
+- **Abstract method definitions**: Enforces implementation of core methods in subclasses
+- **Optimizer configuration**: Sets up AdamW optimizer with learning rate scheduling
+- **Normalization statistics logging**: Saves normalization parameters for reproducibility
+- **Hyperparameter management**: Automatically saves hyperparameters for experiment tracking
+
+## BaseTask Class
+
+::: gridfm_graphkit.tasks.base_task.BaseTask
+    options:
+      show_root_heading: true
+      show_source: true
+      members:
+        - __init__
+        - forward
+        - training_step
+        - validation_step
+        - test_step
+        - predict_step
+        - on_fit_start
+        - configure_optimizers
+
+## Methods
+
+### `__init__(args, data_normalizers)`
+
+Initialize the base task with configuration and normalizers.
+
+**Parameters:**
+
+- `args` (NestedNamespace): Experiment configuration containing all hyperparameters
+- `data_normalizers` (list): List of normalizer objects, one per dataset
+
+**Attributes Set:**
+
+- `self.args`: Stores the configuration
+- `self.data_normalizers`: Stores the normalizers
+- Automatically calls `save_hyperparameters()` for experiment tracking
+
+---
+
+### `forward(*args, **kwargs)` (Abstract)
+
+Defines the forward pass through the model. Must be implemented by subclasses.
+
+**Returns:**
+
+- Model output (structure depends on task implementation)
+
+---
+
+### `training_step(batch)` (Abstract)
+
+Executes one training step. Must be implemented by subclasses.
+
+**Parameters:**
+
+- `batch`: A batch of data from the training dataloader
+
+**Returns:**
+
+- Loss tensor for backpropagation
+
+---
+
+### `validation_step(batch, batch_idx)` (Abstract)
+
+Executes one validation step. Must be implemented by subclasses.
+
+**Parameters:**
+
+- `batch`: A batch of data from the validation dataloader
+- `batch_idx` (int): Index of the current batch
+
+**Returns:**
+
+- Loss tensor or metrics dictionary
+
+---
+
+### `test_step(batch, batch_idx, dataloader_idx=0)` (Abstract)
+
+Executes one test step. Must be implemented by subclasses.
+
+**Parameters:**
+
+- `batch`: A batch of data from the test dataloader
+- `batch_idx` (int): Index of the current batch
+- `dataloader_idx` (int): Index of the dataloader (for multiple test datasets)
+
+**Returns:**
+
+- Metrics dictionary or None
+
+---
+
+### `predict_step(batch, batch_idx, dataloader_idx=0)` (Abstract)
+
+Executes one prediction step. Must be implemented by subclasses.
+
+**Parameters:**
+
+- `batch`: A batch of data from the prediction dataloader
+- `batch_idx` (int): Index of the current batch
+- `dataloader_idx` (int): Index of the dataloader
+
+**Returns:**
+
+- Predictions dictionary
+
+---
+
+### `on_fit_start()`
+
+Called at the beginning of training. Saves normalization statistics to disk.
+
+**Behavior:**
+
+- Creates a `stats` directory in the logging directory
+- Saves human-readable normalization statistics to `normalization_stats.txt`
+- Saves machine-loadable statistics to `normalizer_stats.pt` (PyTorch format)
+- Only executes on rank 0 in distributed training (via `@rank_zero_only` decorator)
+
+**Output Files:**
+
+1. **`normalization_stats.txt`**: Human-readable text file with statistics for each dataset
+2. **`normalizer_stats.pt`**: PyTorch file containing a dictionary keyed by network name
+
+---
+
+### `configure_optimizers()`
+
+Configures the optimizer and learning rate scheduler.
+
+**Optimizer:**
+
+- **Type**: AdamW
+- **Learning Rate**: From `args.optimizer.learning_rate`
+- **Betas**: From `args.optimizer.beta1` and `args.optimizer.beta2`
+
+**Scheduler:**
+
+- **Type**: ReduceLROnPlateau
+- **Mode**: Minimize
+- **Factor**: From `args.optimizer.lr_decay`
+- **Patience**: From `args.optimizer.lr_patience`
+- **Monitored Metric**: "Validation loss"
+
+**Returns:**
+
+- Dictionary with optimizer and lr_scheduler configuration
+
+## Usage
+
+`BaseTask` is not used directly. Instead, create a subclass that implements all abstract methods:
+
+```python
+from gridfm_graphkit.tasks.base_task import BaseTask
+
+class MyCustomTask(BaseTask):
+    def __init__(self, args, data_normalizers):
+        super().__init__(args, data_normalizers)
+        # Initialize task-specific components
+        
+    def forward(self, x_dict, edge_index_dict, edge_attr_dict, mask_dict):
+        # Implement forward pass
+        pass
+        
+    def training_step(self, batch):
+        # Implement training logic
+        pass
+        
+    def validation_step(self, batch, batch_idx):
+        # Implement validation logic
+        pass
+        
+    def test_step(self, batch, batch_idx, dataloader_idx=0):
+        # Implement test logic
+        pass
+        
+    def predict_step(self, batch, batch_idx, dataloader_idx=0):
+        # Implement prediction logic
+        pass
+```
+
+## Configuration Example
+
+The base task uses the following configuration sections:
+
+```yaml
+optimizer:
+  learning_rate: 0.001
+  beta1: 0.9
+  beta2: 0.999
+  lr_decay: 0.7
+  lr_patience: 5
+
+data:
+  networks:
+    - case14_ieee
+    - case118_ieee
+```
+
+## Related
+
+- [Reconstruction Task](reconstruction_task.md): Base class for reconstruction tasks
+- [Power Flow Task](power_flow.md): Concrete implementation for power flow
+- [Optimal Power Flow Task](optimal_power_flow.md): Concrete implementation for OPF
+- [State Estimation Task](state_estimation.md): Concrete implementation for state estimation
\ No newline at end of file
diff --git a/docs/tasks/feature_reconstruction.md b/docs/tasks/feature_reconstruction.md
index 39e0823..6356c82 100644
--- a/docs/tasks/feature_reconstruction.md
+++ b/docs/tasks/feature_reconstruction.md
@@ -1,3 +1,185 @@
-# Feature Reconstruction Task
+# Task Classes Overview
 
-::: gridfm_graphkit.tasks.feature_reconstruction_task.FeatureReconstructionTask
+GridFM-GraphKit provides a hierarchical task system for power grid analysis. All tasks inherit from a common base class and share core functionality while implementing domain-specific logic.
+
+## Task Hierarchy
+
+```
+BaseTask (Abstract)
+    └── ReconstructionTask
+            ├── PowerFlowTask
+            ├── OptimalPowerFlowTask
+            └── StateEstimationTask
+```
+
+## Available Task Classes
+
+### Base Classes
+
+- **[BaseTask](base_task.md)**: Abstract base class providing common functionality for all tasks
+    - Optimizer configuration
+    - Learning rate scheduling
+    - Normalization statistics logging
+    - Abstract method definitions
+
+- **[ReconstructionTask](reconstruction_task.md)**: Base class for feature reconstruction tasks
+    - Model integration
+    - Loss function handling
+    - Shared training/validation logic
+    - Test output management
+
+### Concrete Task Implementations
+
+- **[PowerFlowTask](power_flow.md)**: Power flow analysis
+    - Computes voltage profiles and power flows
+    - Physics-based validation with Power Balance Error (PBE)
+    - Separate metrics for PQ, PV, and REF buses
+    - Detailed per-bus predictions
+
+- **[OptimalPowerFlowTask](optimal_power_flow.md)**: Optimal power flow with economic optimization
+    - Minimizes generation costs
+    - Tracks optimality gap
+    - Monitors constraint violations (thermal, voltage, angle)
+    - Evaluates reactive power limits
+
+- **[StateEstimationTask](state_estimation.md)**: State estimation from noisy measurements
+    - Handles measurement noise and outliers
+    - Separate evaluation for outliers, masked values, and clean measurements
+    - Correlation analysis between predictions, measurements, and targets
+
+## Quick Reference
+
+### Method Overview
+
+All task classes implement the following core methods:
+
+| Method | Purpose | Implemented In |
+|--------|---------|----------------|
+| `__init__` | Initialize task with config and normalizers | All classes |
+| `forward` | Forward pass through model | ReconstructionTask+ |
+| `training_step` | Execute one training step | ReconstructionTask+ |
+| `validation_step` | Execute one validation step | ReconstructionTask+ |
+| `test_step` | Execute one test step | Concrete tasks |
+| `predict_step` | Execute one prediction step | Concrete tasks |
+| `on_fit_start` | Save normalization stats before training | BaseTask |
+| `on_test_end` | Generate reports and plots after testing | Concrete tasks |
+| `configure_optimizers` | Setup optimizer and scheduler | BaseTask |
+
+### Task Selection
+
+Tasks are automatically selected based on your YAML configuration:
+
+```yaml
+task:
+  task_name: PowerFlow  # or OptimalPowerFlow, StateEstimation
+```
+
+The task registry automatically instantiates the correct task class based on the `task_name` field.
+
+## Common Features
+
+All tasks share these features:
+
+### 1. Distributed Training Support
+- Multi-GPU training with proper metric synchronization
+- Rank 0 handles logging and file I/O
+- Automatic gathering of test outputs across ranks
+
+### 2. Comprehensive Logging
+- Training and validation metrics logged to MLflow or TensorBoard
+- Automatic hyperparameter tracking
+- Normalization statistics saved for reproducibility
+
+### 3. Test Outputs
+- CSV reports with detailed metrics
+- Visualization plots (when `verbose=True`)
+- Per-dataset analysis for multiple test sets
+
+### 4. Physics-Based Evaluation
+- Power balance error computation
+- Branch flow calculations
+- Residual analysis by bus type
+
+## Configuration
+
+### Basic Configuration
+
+```yaml
+task:
+  task_name: PowerFlow
+  verbose: true
+
+training:
+  batch_size: 64
+  epochs: 100
+  losses: ["MaskedMSE", "PBE"]
+  loss_weights: [0.01, 0.99]
+
+optimizer:
+  learning_rate: 0.001
+  beta1: 0.9
+  beta2: 0.999
+  lr_decay: 0.7
+  lr_patience: 5
+```
+
+### Task-Specific Options
+
+Each task may have additional configuration options. See the individual task documentation for details:
+
+- [Power Flow Configuration](power_flow.md#configuration-example)
+- [Optimal Power Flow Configuration](optimal_power_flow.md#configuration-example)
+- [State Estimation Configuration](state_estimation.md#configuration-example)
+
+## Creating Custom Tasks
+
+To create a custom task, extend `ReconstructionTask` or `BaseTask`:
+
+```python
+from gridfm_graphkit.tasks.reconstruction_tasks import ReconstructionTask
+from gridfm_graphkit.io.registries import TASK_REGISTRY
+
+@TASK_REGISTRY.register("MyCustomTask")
+class MyCustomTask(ReconstructionTask):
+    def __init__(self, args, data_normalizers):
+        super().__init__(args, data_normalizers)
+        # Add custom initialization
+    
+    def test_step(self, batch, batch_idx, dataloader_idx=0):
+        # Implement custom test logic
+        output, loss_dict = self.shared_step(batch)
+        
+        # Add custom metrics
+        custom_metric = self.compute_custom_metric(output, batch)
+        loss_dict["Custom Metric"] = custom_metric
+        
+        # Log metrics
+        for metric, value in loss_dict.items():
+            self.log(f"{dataset_name}/{metric}", value)
+        
+        return loss_dict["loss"]
+    
+    def predict_step(self, batch, batch_idx, dataloader_idx=0):
+        # Implement custom prediction logic
+        output, _ = self.shared_step(batch)
+        return {"predictions": output}
+    
+    def on_test_end(self):
+        # Custom analysis and visualization
+        # Generate reports, plots, etc.
+        super().on_test_end()
+```
+
+Then use it in your configuration:
+
+```yaml
+task:
+  task_name: MyCustomTask
+```
+
+## Related Documentation
+
+- [Loss Functions](../training/loss.md): Available loss functions and their configuration
+- [Data Modules](../datasets/data_modules.md): Data loading and preprocessing
+- [Models](../models/models.md): Available model architectures
+- [Quick Start Guide](../quick_start/quick_start.md): Getting started with training
diff --git a/docs/tasks/optimal_power_flow.md b/docs/tasks/optimal_power_flow.md
new file mode 100644
index 0000000..3d13a57
--- /dev/null
+++ b/docs/tasks/optimal_power_flow.md
@@ -0,0 +1,12 @@
+# Optimal Power Flow Task
+
+## OptimalPowerFlowTask Class
+
+::: gridfm_graphkit.tasks.opf_task.OptimalPowerFlowTask
+    options:
+      show_root_heading: true
+      show_source: true
+      members:
+        - __init__
+        - test_step
+        - on_test_end
diff --git a/docs/tasks/power_flow.md b/docs/tasks/power_flow.md
new file mode 100644
index 0000000..8912a26
--- /dev/null
+++ b/docs/tasks/power_flow.md
@@ -0,0 +1,12 @@
+# Power Flow Task
+
+## PowerFlowTask Class
+
+::: gridfm_graphkit.tasks.pf_task.PowerFlowTask
+    options:
+      show_root_heading: true
+      show_source: true
+      members:
+        - __init__
+        - test_step
+        - on_test_end
diff --git a/docs/tasks/reconstruction_task.md b/docs/tasks/reconstruction_task.md
new file mode 100644
index 0000000..28bd8aa
--- /dev/null
+++ b/docs/tasks/reconstruction_task.md
@@ -0,0 +1,293 @@
+# Reconstruction Task
+
+The `ReconstructionTask` class is a concrete implementation of `BaseTask` that provides the foundation for node feature reconstruction on power grid graphs. It wraps a GridFM model and defines the training, validation, and testing logic for reconstructing masked node features.
+
+## Overview
+
+`ReconstructionTask` serves as the base class for all reconstruction-based tasks in GridFM-GraphKit, including:
+
+- Power Flow (PF)
+- Optimal Power Flow (OPF)
+- State Estimation (SE)
+
+It provides:
+
+- **Model integration**: Loads and wraps the GridFM model
+- **Loss function handling**: Configures and applies loss functions
+- **Shared training logic**: Common training and validation steps
+- **Test output management**: Collects and manages test outputs for analysis
+
+## ReconstructionTask Class
+
+::: gridfm_graphkit.tasks.reconstruction_tasks.ReconstructionTask
+    options:
+      show_root_heading: true
+      show_source: true
+      members:
+        - __init__
+        - forward
+        - shared_step
+        - training_step
+        - validation_step
+        - on_test_end
+
+## Methods
+
+### `__init__(args, data_normalizers)`
+
+Initialize the reconstruction task with model, loss function, and configuration.
+
+**Parameters:**
+
+- `args` (NestedNamespace): Experiment configuration with fields like:
+    - `training.batch_size`: Batch size for training
+    - `optimizer.*`: Optimizer configuration
+    - `model.*`: Model architecture configuration
+    - `training.losses`: List of loss functions to use
+    - `data.networks`: List of network names
+- `data_normalizers` (list): One normalizer per dataset for feature normalization/denormalization
+
+**Attributes Set:**
+
+- `self.model`: GridFM model loaded via `load_model()`
+- `self.loss_fn`: Loss function resolved from configuration via `get_loss_function()`
+- `self.batch_size`: Training batch size
+- `self.test_outputs`: Dictionary to store test outputs per dataset (keyed by dataloader index)
+
+**Example:**
+
+```python
+task = ReconstructionTask(args, data_normalizers)
+```
+
+---
+
+### `forward(x_dict, edge_index_dict, edge_attr_dict, mask_dict)`
+
+Forward pass through the model.
+
+**Parameters:**
+
+- `x_dict` (dict): Node features dictionary with keys like `"bus"`, `"gen"`
+- `edge_index_dict` (dict): Edge indices dictionary for heterogeneous edges
+- `edge_attr_dict` (dict): Edge attributes dictionary
+- `mask_dict` (dict): Masking dictionary indicating which features are masked
+
+**Returns:**
+
+- Model output dictionary with predicted node features
+
+**Example:**
+
+```python
+output = task.forward(
+    x_dict=batch.x_dict,
+    edge_index_dict=batch.edge_index_dict,
+    edge_attr_dict=batch.edge_attr_dict,
+    mask_dict=batch.mask_dict
+)
+```
+
+---
+
+### `shared_step(batch)`
+
+Common logic for training and validation steps.
+
+**Parameters:**
+
+- `batch`: A batch from the dataloader containing:
+    - `x_dict`: Input node features
+    - `y_dict`: Target node features
+    - `edge_index_dict`: Edge connectivity
+    - `edge_attr_dict`: Edge attributes
+    - `mask_dict`: Feature masks
+
+**Returns:**
+
+- `output` (dict): Model predictions
+- `loss_dict` (dict): Dictionary containing:
+    - `"loss"`: Total loss value
+    - Additional loss components (if applicable)
+
+**Behavior:**
+
+1. Performs forward pass through the model
+2. Computes loss using the configured loss function
+3. Returns both predictions and loss dictionary
+
+**Example:**
+
+```python
+output, loss_dict = task.shared_step(batch)
+total_loss = loss_dict["loss"]
+```
+
+---
+
+### `training_step(batch)`
+
+Execute one training step.
+
+**Parameters:**
+
+- `batch`: Training batch from dataloader
+
+**Returns:**
+
+- Loss tensor for backpropagation
+
+**Logged Metrics:**
+
+- `"Training Loss"`: Total training loss
+- `"Learning Rate"`: Current learning rate
+
+**Logging Configuration:**
+
+- `batch_size`: Number of graphs in batch
+- `sync_dist=False`: No synchronization across GPUs during training
+- `on_epoch=False`: Log per step, not per epoch
+- `on_step=True`: Log at each training step
+- `prog_bar=False`: Don't show in progress bar
+- `logger=True`: Send to logger (e.g., MLflow)
+
+---
+
+### `validation_step(batch, batch_idx)`
+
+Execute one validation step.
+
+**Parameters:**
+
+- `batch`: Validation batch from dataloader
+- `batch_idx` (int): Index of the current batch
+
+**Returns:**
+
+- Loss tensor
+
+**Logged Metrics:**
+
+- `"Validation loss"`: Total validation loss
+- Additional loss components (if multiple losses are used)
+
+**Logging Configuration:**
+
+- `batch_size`: Number of graphs in batch
+- `sync_dist=True`: Synchronize metrics across GPUs
+- `on_epoch=True`: Aggregate and log at epoch end
+- `on_step=False`: Don't log individual steps
+- `logger=True`: Send to logger
+
+**Note:** The validation loss is monitored by the learning rate scheduler for automatic learning rate reduction.
+
+---
+
+### `on_test_end()`
+
+Called at the end of testing. Clears stored test outputs.
+
+**Behavior:**
+
+- Clears the `self.test_outputs` dictionary
+- Only executes on rank 0 in distributed training (via `@rank_zero_only` decorator)
+- Subclasses typically override this to add custom analysis, plotting, and CSV generation
+
+**Note:** This is a minimal implementation. Task-specific subclasses (PowerFlowTask, OptimalPowerFlowTask, StateEstimationTask) override this method to:
+
+- Generate detailed metrics CSV files
+- Create visualization plots
+- Save analysis results
+
+---
+
+## Usage
+
+`ReconstructionTask` can be used directly for simple reconstruction tasks, but is typically subclassed for specific power system tasks:
+
+```python
+from gridfm_graphkit.tasks.reconstruction_tasks import ReconstructionTask
+
+# Direct usage (simple reconstruction)
+task = ReconstructionTask(args, data_normalizers)
+
+# Or create a subclass for custom behavior
+class CustomReconstructionTask(ReconstructionTask):
+    def test_step(self, batch, batch_idx, dataloader_idx=0):
+        # Custom test logic
+        output, loss_dict = self.shared_step(batch)
+        # Add custom metrics
+        return loss_dict["loss"]
+    
+    def on_test_end(self):
+        # Custom analysis and visualization
+        super().on_test_end()
+```
+
+## Configuration Example
+
+```yaml
+task:
+  task_name: Reconstruction  # Or PowerFlow, OptimalPowerFlow, StateEstimation
+
+model:
+  type: GNS_heterogeneous
+  hidden_size: 48
+  num_layers: 12
+  attention_head: 8
+
+training:
+  batch_size: 64
+  epochs: 100
+  losses:
+    - MaskedMSE
+  loss_weights:
+    - 1.0
+
+optimizer:
+  learning_rate: 0.001
+  beta1: 0.9
+  beta2: 0.999
+  lr_decay: 0.7
+  lr_patience: 5
+```
+
+## Loss Functions
+
+The reconstruction task supports various loss functions configured via the YAML file:
+
+- **MaskedMSE**: Mean squared error on masked features only
+- **MaskedBusMSE**: MSE specifically for bus node features
+- **LayeredWeightedPhysics**: Physics-based loss with layer-wise weighting
+- **PBE**: Power Balance Error loss
+
+Multiple losses can be combined with weights:
+
+```yaml
+training:
+  losses:
+    - LayeredWeightedPhysics
+    - MaskedBusMSE
+  loss_weights:
+    - 0.1
+    - 0.9
+  loss_args:
+    - base_weight: 0.5
+    - {}
+```
+
+## Subclasses
+
+The following task classes extend `ReconstructionTask`:
+
+- **[PowerFlowTask](power_flow.md)**: Adds power flow-specific metrics and physics validation
+- **[OptimalPowerFlowTask](optimal_power_flow.md)**: Adds economic optimization metrics and constraint violation tracking
+- **[StateEstimationTask](state_estimation.md)**: Adds measurement-based estimation and outlier handling
+
+## Related
+
+- [Base Task](base_task.md): Abstract base class for all tasks
+- [Power Flow Task](power_flow.md): Power flow analysis implementation
+- [Optimal Power Flow Task](optimal_power_flow.md): OPF optimization implementation
+- [State Estimation Task](state_estimation.md): State estimation implementation
+- [Loss Functions](../training/loss.md): Available loss functions
\ No newline at end of file
diff --git a/docs/tasks/state_estimation.md b/docs/tasks/state_estimation.md
new file mode 100644
index 0000000..c3adbbc
--- /dev/null
+++ b/docs/tasks/state_estimation.md
@@ -0,0 +1,11 @@
+# State Estimation Task
+
+::: gridfm_graphkit.tasks.se_task.StateEstimationTask
+    options:
+      show_root_heading: true
+      show_source: true
+      members:
+        - __init__
+        - test_step
+        - on_test_end
+        - predict_step
diff --git a/docs/training/loss.md b/docs/training/loss.md
index 5cde707..de56d4b 100644
--- a/docs/training/loss.md
+++ b/docs/training/loss.md
@@ -1,49 +1,47 @@
 # Loss Functions
 
-### `Power Balance Equation Loss`
+## Base Loss
 
-$$
-\mathcal{L}_{\text{PBE}} = \frac{1}{N} \sum_{i=1}^N \left| (P_{G,i} - P_{D,i}) + j(Q_{G,i} - Q_{D,i}) - S_{\text{injection}, i} \right|
-$$
-
-::: gridfm_graphkit.training.loss.PBELoss
+::: gridfm_graphkit.training.loss.BaseLoss
 
 ---
 
-### `Mean Squared Error Loss`
-
-$$
-\mathcal{L}_{\text{MSE}} = \frac{1}{N} \sum_{i=1}^N (y_i - \hat{y}_i)^2
-$$
+## Mean Squared Error Loss
 
 ::: gridfm_graphkit.training.loss.MSELoss
 
 ---
 
-### `Masked Mean Squared Error Loss`
-
-$$
-\mathcal{L}_{\text{MaskedMSE}} = \frac{1}{|M|} \sum_{i \in M} (y_i - \hat{y}_i)^2
-$$
+## Masked Mean Squared Error Loss
 
 ::: gridfm_graphkit.training.loss.MaskedMSELoss
 
 ---
 
-### `Scaled Cosine Error Loss`
-
-$$
-\mathcal{L}_{\text{SCE}} = \frac{1}{N} \sum_{i=1}^N \left(1 - \frac{\hat{y}^T_i \cdot y_i}{\|\hat{y}_i\| \|y_i\|}\right)^\alpha \text{ , } \alpha \geq 1
-$$
+## Masked Generator MSE Loss
 
-::: gridfm_graphkit.training.loss.SCELoss
+::: gridfm_graphkit.training.loss.MaskedGenMSE
 
 ---
 
-### `Mixed Loss`
+## Masked Bus MSE Loss
 
-$$
-\mathcal{L}_{\text{Mixed}} = \sum_{m=1}^M w_m \cdot \mathcal{L}_m
-$$
+::: gridfm_graphkit.training.loss.MaskedBusMSE
+
+---
+
+## Mixed Loss
 
 ::: gridfm_graphkit.training.loss.MixedLoss
+
+---
+
+## Layered Weighted Physics Loss
+
+::: gridfm_graphkit.training.loss.LayeredWeightedPhysicsLoss
+
+---
+
+## Loss Per Dimension
+
+::: gridfm_graphkit.training.loss.LossPerDim
diff --git a/gridfm_graphkit/tasks/opf_task.py b/gridfm_graphkit/tasks/opf_task.py
index b28c5a0..06d938d 100644
--- a/gridfm_graphkit/tasks/opf_task.py
+++ b/gridfm_graphkit/tasks/opf_task.py
@@ -256,8 +256,8 @@ def test_step(self, batch, batch_idx, dataloader_idx=0):
         loss_dict["Opt gap"] = optimality_gap
         loss_dict["MSE PG"] = mse_PG[PG_H]
 
-        loss_dict["Branch termal violation from"] = mean_thermal_violation_forward
-        loss_dict["Branch termal violation to"] = mean_thermal_violation_reverse
+        loss_dict["Branch thermal violation from"] = mean_thermal_violation_forward
+        loss_dict["Branch thermal violation to"] = mean_thermal_violation_reverse
         loss_dict["Branch voltage angle difference violations"] = (
             branch_angle_violation_mean
         )
diff --git a/integrationtests/test_base_set.py b/integrationtests/test_base_set.py
new file mode 100644
index 0000000..e33e11c
--- /dev/null
+++ b/integrationtests/test_base_set.py
@@ -0,0 +1,167 @@
+import pytest
+import subprocess
+import os
+import glob
+import pandas as pd
+import yaml
+import urllib.request
+import shutil
+
+
+def execute_and_live_output(cmd) -> None:
+    result = subprocess.run(
+        cmd,
+        text=True,
+        shell=True,
+        check=True
+    )
+
+
+def prepare_config():
+    """
+    Download default.yaml from gridfm-datakit repo and modify it with test parameters.
+    """
+    config_url = "https://raw.githubusercontent.com/gridfm/gridfm-datakit/refs/heads/main/scripts/config/default.yaml"
+    config_path = "integrationtests/default.yaml"
+
+    print(f"Downloading config from {config_url}...")
+    with urllib.request.urlopen(config_url) as response:
+        config_content = response.read().decode("utf-8")
+
+    config = yaml.safe_load(config_content)
+
+    config["network"]["name"] = "case14_ieee"
+    config["load"]["scenarios"] = 10000
+    config["topology_perturbation"]["n_topology_variants"] = 2
+
+    with open(config_path, "w") as f:
+        yaml.dump(config, f, default_flow_style=False, sort_keys=False)
+
+    print(f"Config prepared at {config_path} with:")
+    print(f"  - network.name: {config['network']['name']}")
+    print(f"  - load.scenarios: {config['load']['scenarios']}")
+    print(
+        f"  - topology_perturbation.n_topology_variants: "
+        f"{config['topology_perturbation']['n_topology_variants']}"
+    )
+
+    return config_path
+
+
+def prepare_training_config():
+    """
+    Modify the training config to set epochs to 2 for testing.
+    """
+    config_path = "examples/config/HGNS_PF_datakit_case14.yaml"
+
+    with open(config_path, "r") as f:
+        config = yaml.safe_load(f)
+
+    if "training" not in config:
+        config["training"] = {}
+
+    config["training"]["epochs"] = 2
+
+    with open(config_path, "w") as f:
+        yaml.dump(config, f, default_flow_style=False, sort_keys=False)
+
+    print(f"Training config updated: epochs set to {config['training']['epochs']}")
+
+    return config_path
+
+
+@pytest.fixture
+def cleanup_test_artifacts():
+    """
+    Backup modified files and remove generated artifacts after the test.
+    """
+    training_config = "examples/config/HGNS_PF_datakit_case14.yaml"
+    backup_config = training_config + ".bak"
+
+    if os.path.exists(training_config):
+        shutil.copy2(training_config, backup_config)
+
+    yield
+
+    # Restore training config
+    if os.path.exists(backup_config):
+        shutil.move(backup_config, training_config)
+
+    # Remove downloaded config
+    config_file = "integrationtests/default.yaml"
+    if os.path.exists(config_file):
+        os.remove(config_file)
+
+    # Remove generated directories
+    for d in ["data_out", "logs"]:
+        if os.path.exists(d):
+            shutil.rmtree(d, ignore_errors=True)
+
+
+def test_train(cleanup_test_artifacts):
+    """
+    Integration test for gridfm-datakit data generation and gridfm-graphkit training.
+
+    Steps:
+    1. Generate power grid data using gridfm-datakit
+    2. Train a model using gridfm-graphkit
+    3. Validate the PBE Mean metric
+    """
+
+    data_dir = "data_out"
+
+    if not os.path.exists(data_dir) or not os.listdir(data_dir):
+        print("Data directory not found or empty, generating data...")
+
+        config_path = prepare_config()
+
+        execute_and_live_output(
+            f"gridfm_datakit generate {config_path}"
+        )
+    else:
+        print(f"Data directory '{data_dir}' already exists, skipping generation.")
+
+    training_config_path = prepare_training_config()
+
+    execute_and_live_output(
+        f"gridfm_graphkit train "
+        f"--config {training_config_path} "
+        f"--data_path data_out/ "
+        f"--exp_name exp1 "
+        f"--run_name run1 "
+        f"--log_dir logs"
+    )
+
+    log_base = "logs"
+
+    exp_dirs = glob.glob(os.path.join(log_base, "*"))
+    assert len(exp_dirs) > 0, "No experiment directories found in logs/"
+
+    latest_exp_dir = sorted(exp_dirs, key=os.path.getctime)[-1]
+
+    run_dirs = glob.glob(os.path.join(latest_exp_dir, "*"))
+    assert len(run_dirs) > 0, f"No run directories found in {latest_exp_dir}"
+
+    latest_run_dir = max(run_dirs, key=os.path.getmtime)
+
+    metrics_file = os.path.join(
+        latest_run_dir,
+        "artifacts",
+        "test",
+        "case14_ieee_metrics.csv"
+    )
+
+    assert os.path.exists(metrics_file), f"Metrics file not found: {metrics_file}"
+
+    df = pd.read_csv(metrics_file)
+
+    pbe_mean_row = df[df["Metric"] == "PBE Mean"]
+    assert len(pbe_mean_row) > 0, "PBE Mean metric not found in CSV"
+
+    pbe_mean_value = float(pbe_mean_row.iloc[0]["Value"])
+
+    assert 1.1 <= pbe_mean_value <= 2.9, (
+        f"PBE Mean value {pbe_mean_value} is outside acceptable range [1.1, 2.9]"
+    )
+
+    print(f"PBE Mean value {pbe_mean_value} is within acceptable range [1.1, 2.9]")
\ No newline at end of file
diff --git a/mkdocs.yml b/mkdocs.yml
index afc3359..6581214 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -19,7 +19,12 @@ nav:
       - Data Modules: datasets/data_modules.md
       - Transforms: datasets/transforms.md
     - Tasks:
-      - Feature Reconstruction: tasks/feature_reconstruction.md
+      - Overview: tasks/feature_reconstruction.md
+      - Base Task: tasks/base_task.md
+      - Reconstruction Task: tasks/reconstruction_task.md
+      - Power Flow Task: tasks/power_flow.md
+      - Optimal Power Flow Task: tasks/optimal_power_flow.md
+      - State Estimation Task: tasks/state_estimation.md
     - Models: models/models.md
     - Training:
       - Losses: training/loss.md