diff --git a/opacus/data_loader.py b/opacus/data_loader.py index 6e713f4d..21302baa 100644 --- a/opacus/data_loader.py +++ b/opacus/data_loader.py @@ -1,3 +1,424 @@ +# # Copyright (c) Meta Platforms, Inc. and affiliates. +# # +# # Licensed under the Apache License, Version 2.0 (the "License"); +# # you may not use this file except in compliance with the License. +# # You may obtain a copy of the License at +# # +# # http://www.apache.org/licenses/LICENSE-2.0 +# # +# # Unless required by applicable law or agreed to in writing, software +# # distributed under the License is distributed on an "AS IS" BASIS, +# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# # See the License for the specific language governing permissions and +# # limitations under the License. +# import copy +# import logging +# from typing import Any, List, Mapping, Optional, Sequence, Tuple, Type, Union + +# import torch +# from opacus.utils.uniform_sampler import ( +# DistributedUniformWithReplacementSampler, +# UniformWithReplacementSampler, +# ) +# from torch.utils.data import BatchSampler, DataLoader, Dataset, IterableDataset, Sampler +# from torch.utils.data._utils.collate import default_collate +# from torch.utils.data.dataloader import _collate_fn_t + + +# logger = logging.getLogger(__name__) + + +# class CollateFnWithEmpty: +# """ +# Collate function wrapper that handles empty batches by preserving batch structure. + +# This wrapper is stateful and learns the expected batch structure from the first +# non-empty batch it processes. When an empty batch is encountered, it generates +# an empty batch with the same structure (tensors, dicts, lists, or nested combinations) +# but with zero-length batch dimensions. + +# This is particularly useful for Poisson sampling in differential privacy, where +# batch sizes can vary and occasionally result in empty batches. + +# Args: +# collator_fn: The original collate function to wrap. If None, returns batch as-is. +# batch_first: If True, batch dimension is the first dimension (index 0). +# If False, batch dimension is the second dimension (index 1). +# Default: True +# rand_on_empty: If True, returns tensors filled with random values (0 or 1) +# with batch dimension set to 1 when encountering empty batches. +# If False, returns tensors with batch dimension set to 0. +# Default: False + +# Example: +# >>> collate_fn = CollateFnWithEmpty(default_collate) +# >>> # First batch: [{"x": tensor([1, 2]), "y": tensor([3, 4])}] +# >>> # Empty batch: [] -> {"x": tensor([]), "y": tensor([])} + +# Note: +# The first batch processed must be non-empty, as it defines the structure +# for all subsequent empty batches. + +# Only torch.Tensor, dict (Mapping), list, and tuple types are supported. +# If your collate function returns other types, a TypeError will be raised +# to preserve DP guarantees (returning non-empty data for empty batches +# would violate the privacy guarantee). +# """ + +# def __init__( +# self, +# collator_fn: Optional[_collate_fn_t], +# batch_first: bool = True, +# rand_on_empty: bool = False, +# sample_empty_shapes: Optional[Sequence[Tuple]] = None, +# dtypes: Optional[Sequence[Union[torch.dtype, Type]]] = None, +# ) -> None: +# self.wrapped_collator_fn = collator_fn +# self.batch_first = batch_first +# self.rand_on_empty = rand_on_empty +# self.sample_empty_shapes = sample_empty_shapes +# self.dtypes = dtypes +# self.first_batch = None + +# def __call__(self, batch: List[Any]) -> Union[torch.Tensor, List, Mapping]: +# if len(batch) > 0: +# if not self.wrapped_collator_fn: +# output = batch +# else: +# output = self.wrapped_collator_fn(batch) +# if self.first_batch is None: +# self.first_batch = copy.deepcopy(output) +# else: +# if self.first_batch is None: +# if self.sample_empty_shapes is not None and self.dtypes is not None: +# logger.warning( +# "First batch is empty. We are using a list of zero-valued " +# "tensors as a batch. This may cause issues if the model " +# "expects a different batch format. To fix, use more data, " +# "increase epsilon, or increase sampling rate." +# ) +# return [ +# torch.zeros(shape, dtype=dtype) +# for shape, dtype in zip(self.sample_empty_shapes, self.dtypes) +# ] +# else: +# logger.warning( +# "First batch is empty. We are using an empty list as a " +# "batch. This may cause issues if the model expects a " +# "different batch format. To fix, use more data, increase " +# "epsilon, or increase sampling rate." +# ) +# return [] + +# # materialize into empty with the same structure as list/dict +# output = self._make_empty_batch(self.first_batch) + +# return output + +# def _make_empty_batch( +# self, sample: Union[torch.Tensor, Mapping, List, Any] +# ) -> Union[torch.Tensor, Mapping, List, Any]: +# if torch.is_tensor(sample): +# shape = list(sample.shape) +# # If it's at least 1D, set batch dim to 1; otherwise make a 0-length 1D tensor +# batch_dim = 0 if self.batch_first else 1 +# shape[batch_dim] = 1 if self.rand_on_empty else 0 +# if self.rand_on_empty: +# return torch.randint( +# 0, 2, shape, dtype=sample.dtype, device=sample.device +# ) +# else: +# return torch.empty(shape, dtype=sample.dtype, device=sample.device) + +# if isinstance(sample, Mapping): +# return {k: self._make_empty_batch(v) for k, v in sample.items()} + +# if isinstance(sample, (list, tuple)): +# converted = [self._make_empty_batch(v) for v in sample] +# return type(sample)(converted) + +# # Unsupported type - raise error to preserve DP guarantees +# raise TypeError( +# f"Unsupported batch type: {type(sample).__name__}. " +# f"CollateFnWithEmpty only supports batches containing torch.Tensor, " +# f"dict (Mapping), list, or tuple types. " +# f"If you need support for a different output type, please open an issue at " +# f"Opacus or submit a PR." +# ) + + +# def wrap_collate_with_empty( +# *, +# collate_fn: Optional[_collate_fn_t], +# batch_first: bool = True, +# rand_on_empty: bool = False, +# sample_empty_shapes: Optional[Sequence[Tuple]] = None, +# dtypes: Optional[Sequence[Union[torch.dtype, Type]]] = None, +# ) -> CollateFnWithEmpty: +# """ +# Wraps given collate function to handle empty batches. + +# This function returns a stateful ``CollateFnWithEmpty`` instance that learns +# the batch structure from the first non-empty batch and uses this structure +# to generate properly shaped empty batches when needed. + +# Args: +# collate_fn: collate function to wrap. If None, returns batches as-is. +# batch_first: Flag to indicate if the input tensor to the corresponding module +# has the first dimension representing the batch. If set to True, dimensions on +# input tensor are expected be ``[batch_size, ...]``, otherwise +# ``[K, batch_size, ...]`` +# rand_on_empty: set ``True`` to return a batch containing random numbers when encountering +# empty batches rather than tensors with zero-length batch dimensions + +# Returns: +# CollateFnWithEmpty: A callable that is equivalent to input ``collate_fn`` for non-empty +# batches and outputs empty tensors with the same structure when the input batch is empty. +# The structure is learned from the first non-empty batch. + +# Example: +# >>> from torch.utils.data._utils.collate import default_collate +# >>> collate = wrap_collate_with_empty(collate_fn=default_collate) +# >>> # First batch defines structure +# >>> result = collate([{"x": torch.tensor([1, 2])}]) +# >>> # Empty batch uses learned structure +# >>> empty = collate([]) # Returns {"x": torch.tensor([])} +# """ + +# return CollateFnWithEmpty( +# collate_fn, +# batch_first=batch_first, +# rand_on_empty=rand_on_empty, +# sample_empty_shapes=sample_empty_shapes, +# dtypes=dtypes, +# ) + + +# def shape_safe(x: Any) -> Tuple: +# """Exception-safe getter for ``shape`` attribute.""" +# return getattr(x, "shape", ()) + + +# def dtype_safe(x: Any) -> Union[torch.dtype, Type]: +# """Exception-safe getter for ``dtype`` attribute.""" +# return getattr(x, "dtype", type(x)) + + +# class DPDataLoader(DataLoader): +# """ +# DataLoader subclass that always does Poisson sampling and supports empty batches +# by default. + +# Typically instantiated via ``DPDataLoader.from_data_loader()`` method based +# on another DataLoader. DPDataLoader would preserve the behaviour of the original +# data loader, except for the two aspects. + +# First, it switches ``batch_sampler`` to ``UniformWithReplacementSampler``, thus enabling +# Poisson sampling (i.e. each element in the dataset is selected to be in the +# next batch with a certain probability defined by ``sample_rate`` parameter). +# NB: this typically leads to a batches of variable size. +# NB2: By default, ``sample_rate`` is calculated based on the ``batch_size`` of the +# original data loader, so that the average batch size stays the same + +# Second, it wraps collate function with support for empty batches. +# Most PyTorch modules will happily process tensors of shape ``(0, N, ...)``, +# but many collate functions will fail to produce such a batch. As with the +# Poisson sampling empty batches become a possibility, we need a DataLoader that +# can handle them. +# """ + +# def __init__( +# self, +# dataset: Dataset, +# *, +# sample_rate: float, +# collate_fn: Optional[_collate_fn_t] = None, +# drop_last: bool = False, +# generator=None, +# distributed: bool = False, +# batch_first: bool = True, +# rand_on_empty: bool = False, +# **kwargs, +# ): +# """ + +# Args: +# dataset: See :class:`torch.utils.data.DataLoader` +# sample_rate: probability with which each element of the dataset is included +# in the next batch. +# num_workers: See :class:`torch.utils.data.DataLoader` +# collate_fn: See :class:`torch.utils.data.DataLoader` +# pin_memory: See :class:`torch.utils.data.DataLoader` +# drop_last: See :class:`torch.utils.data.DataLoader` +# timeout: See :class:`torch.utils.data.DataLoader` +# worker_init_fn: See :class:`torch.utils.data.DataLoader` +# multiprocessing_context: See :class:`torch.utils.data.DataLoader` +# generator: Random number generator used to sample elements +# prefetch_factor: See :class:`torch.utils.data.DataLoader` +# persistent_workers: See :class:`torch.utils.data.DataLoader` +# distributed: set ``True`` if you'll be using DPDataLoader in a DDP environment +# Selects between ``DistributedUniformWithReplacementSampler`` and +# ``UniformWithReplacementSampler`` sampler implementations +# rand_on_empty: set ``True`` to return a batch containing random numbers when encountering +# empty batches rather than tensors with zero-length batch dimensions +# """ + +# self.sample_rate = sample_rate +# self.distributed = distributed + +# if distributed: +# batch_sampler = DistributedUniformWithReplacementSampler( +# total_size=len(dataset), # type: ignore[assignment, arg-type] +# sample_rate=sample_rate, +# generator=generator, +# ) +# else: +# batch_sampler = UniformWithReplacementSampler( +# num_samples=len(dataset), # type: ignore[assignment, arg-type] +# sample_rate=sample_rate, +# generator=generator, +# ) +# sample_empty_shapes = [(0, *shape_safe(x)) for x in dataset[0]] +# dtypes = [dtype_safe(x) for x in dataset[0]] + +# if collate_fn is None: +# collate_fn = default_collate + +# if drop_last: +# logger.warning( +# "Ignoring drop_last as it is not compatible with DPDataLoader." +# ) + +# super().__init__( +# dataset=dataset, +# batch_sampler=batch_sampler, +# collate_fn=wrap_collate_with_empty( +# collate_fn=collate_fn, +# batch_first=batch_first, +# rand_on_empty=rand_on_empty, +# sample_empty_shapes=sample_empty_shapes, +# dtypes=dtypes, +# ), +# generator=generator, +# **kwargs, +# ) + +# @classmethod +# def from_data_loader( +# cls, +# data_loader: DataLoader, +# *, +# distributed: bool = False, +# generator=None, +# batch_first: bool = True, +# rand_on_empty: bool = False, +# ): +# """ +# Creates new ``DPDataLoader`` based on passed ``data_loader`` argument. + +# Args: +# data_loader: Any DataLoader instance. Must not be over an ``IterableDataset`` +# distributed: set ``True`` if you'll be using DPDataLoader in a DDP environment +# generator: Random number generator used to sample elements. Defaults to +# generator from the original data loader. +# batch_first: Flag to indicate if the input tensor to the corresponding module +# has the first dimension representing the batch. If set to True, dimensions on +# input tensor are expected be ``[batch_size, ...]``, otherwise +# ``[K, batch_size, ...]`` +# rand_on_empty: set ``True`` to return a batch containing random numbers when encountering +# empty batches rather than tensors with zero-length batch dimensions + + + +# Returns: +# New DPDataLoader instance, with all attributes and parameters inherited +# from the original data loader, except for sampling mechanism. + +# Examples: +# >>> x, y = torch.randn(64, 5), torch.randint(0, 2, (64,)) +# >>> dataset = TensorDataset(x,y) +# >>> data_loader = DataLoader(dataset, batch_size=4) +# >>> dp_data_loader = DPDataLoader.from_data_loader(data_loader) +# """ + +# if isinstance(data_loader.dataset, IterableDataset): +# raise ValueError("Uniform sampling is not supported for IterableDataset") + +# return cls( +# dataset=data_loader.dataset, +# sample_rate=1 / len(data_loader), +# num_workers=data_loader.num_workers, +# collate_fn=data_loader.collate_fn, +# pin_memory=data_loader.pin_memory, +# drop_last=data_loader.drop_last, +# timeout=data_loader.timeout, +# worker_init_fn=data_loader.worker_init_fn, +# multiprocessing_context=data_loader.multiprocessing_context, +# generator=generator if generator else data_loader.generator, +# prefetch_factor=data_loader.prefetch_factor, +# persistent_workers=data_loader.persistent_workers, +# distributed=distributed, +# batch_first=batch_first, +# rand_on_empty=rand_on_empty, +# ) + + +# def _is_supported_batch_sampler(sampler: Sampler): +# return ( +# isinstance(sampler, BatchSampler) +# or isinstance(sampler, UniformWithReplacementSampler) +# or isinstance(sampler, DistributedUniformWithReplacementSampler) +# ) + + +# def switch_generator(*, data_loader: DataLoader, generator): +# """ +# Creates new instance of a ``DataLoader``, with the exact same behaviour of the +# provided data loader, except for the source of randomness. + +# Typically used to enhance a user-provided data loader object with cryptographically +# secure random number generator + +# Args: +# data_loader: Any ``DataLoader`` object +# generator: Random number generator object + +# Returns: +# New ``DataLoader`` object with the exact same behaviour as the input data loader, +# except for the source of randomness. +# """ +# batch_sampler = data_loader.batch_sampler + +# if batch_sampler is None or not _is_supported_batch_sampler(batch_sampler): +# raise ValueError( +# "Non-batch processing is not supported: Opacus always assumes one of the input dimensions to be batch dimension." +# ) + +# if isinstance(batch_sampler, BatchSampler): +# if not hasattr(batch_sampler.sampler, "generator"): +# raise ValueError( +# "Target sampler doesn't have generator attribute: nothing to switch" +# ) + +# batch_sampler.sampler.generator = generator +# else: +# batch_sampler.generator = generator + +# return DataLoader( +# dataset=data_loader.dataset, +# batch_sampler=batch_sampler, +# num_workers=data_loader.num_workers, +# collate_fn=data_loader.collate_fn, +# pin_memory=data_loader.pin_memory, +# drop_last=data_loader.drop_last, +# timeout=data_loader.timeout, +# worker_init_fn=data_loader.worker_init_fn, +# multiprocessing_context=data_loader.multiprocessing_context, +# generator=generator, +# prefetch_factor=data_loader.prefetch_factor, +# persistent_workers=data_loader.persistent_workers, +# ) + # Copyright (c) Meta Platforms, Inc. and affiliates. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -31,15 +452,12 @@ class CollateFnWithEmpty: """ Collate function wrapper that handles empty batches by preserving batch structure. - This wrapper is stateful and learns the expected batch structure from the first non-empty batch it processes. When an empty batch is encountered, it generates an empty batch with the same structure (tensors, dicts, lists, or nested combinations) but with zero-length batch dimensions. - This is particularly useful for Poisson sampling in differential privacy, where batch sizes can vary and occasionally result in empty batches. - Args: collator_fn: The original collate function to wrap. If None, returns batch as-is. batch_first: If True, batch dimension is the first dimension (index 0). @@ -49,16 +467,13 @@ class CollateFnWithEmpty: with batch dimension set to 1 when encountering empty batches. If False, returns tensors with batch dimension set to 0. Default: False - Example: >>> collate_fn = CollateFnWithEmpty(default_collate) >>> # First batch: [{"x": tensor([1, 2]), "y": tensor([3, 4])}] >>> # Empty batch: [] -> {"x": tensor([]), "y": tensor([])} - Note: The first batch processed must be non-empty, as it defines the structure for all subsequent empty batches. - Only torch.Tensor, dict (Mapping), list, and tuple types are supported. If your collate function returns other types, a TypeError will be raised to preserve DP guarantees (returning non-empty data for empty batches @@ -157,11 +572,9 @@ def wrap_collate_with_empty( ) -> CollateFnWithEmpty: """ Wraps given collate function to handle empty batches. - This function returns a stateful ``CollateFnWithEmpty`` instance that learns the batch structure from the first non-empty batch and uses this structure to generate properly shaped empty batches when needed. - Args: collate_fn: collate function to wrap. If None, returns batches as-is. batch_first: Flag to indicate if the input tensor to the corresponding module @@ -170,12 +583,10 @@ def wrap_collate_with_empty( ``[K, batch_size, ...]`` rand_on_empty: set ``True`` to return a batch containing random numbers when encountering empty batches rather than tensors with zero-length batch dimensions - Returns: CollateFnWithEmpty: A callable that is equivalent to input ``collate_fn`` for non-empty batches and outputs empty tensors with the same structure when the input batch is empty. The structure is learned from the first non-empty batch. - Example: >>> from torch.utils.data._utils.collate import default_collate >>> collate = wrap_collate_with_empty(collate_fn=default_collate) @@ -208,18 +619,15 @@ class DPDataLoader(DataLoader): """ DataLoader subclass that always does Poisson sampling and supports empty batches by default. - Typically instantiated via ``DPDataLoader.from_data_loader()`` method based on another DataLoader. DPDataLoader would preserve the behaviour of the original data loader, except for the two aspects. - First, it switches ``batch_sampler`` to ``UniformWithReplacementSampler``, thus enabling Poisson sampling (i.e. each element in the dataset is selected to be in the next batch with a certain probability defined by ``sample_rate`` parameter). NB: this typically leads to a batches of variable size. NB2: By default, ``sample_rate`` is calculated based on the ``batch_size`` of the original data loader, so that the average batch size stays the same - Second, it wraps collate function with support for empty batches. Most PyTorch modules will happily process tensors of shape ``(0, N, ...)``, but many collate functions will fail to produce such a batch. As with the @@ -241,7 +649,6 @@ def __init__( **kwargs, ): """ - Args: dataset: See :class:`torch.utils.data.DataLoader` sample_rate: probability with which each element of the dataset is included @@ -315,7 +722,6 @@ def from_data_loader( ): """ Creates new ``DPDataLoader`` based on passed ``data_loader`` argument. - Args: data_loader: Any DataLoader instance. Must not be over an ``IterableDataset`` distributed: set ``True`` if you'll be using DPDataLoader in a DDP environment @@ -327,13 +733,9 @@ def from_data_loader( ``[K, batch_size, ...]`` rand_on_empty: set ``True`` to return a batch containing random numbers when encountering empty batches rather than tensors with zero-length batch dimensions - - - Returns: New DPDataLoader instance, with all attributes and parameters inherited from the original data loader, except for sampling mechanism. - Examples: >>> x, y = torch.randn(64, 5), torch.randint(0, 2, (64,)) >>> dataset = TensorDataset(x,y) @@ -346,7 +748,7 @@ def from_data_loader( return cls( dataset=data_loader.dataset, - sample_rate=1 / len(data_loader), + sample_rate=(data_loader.batch_size if data_loader.batch_size is not None else data_loader.batch_sampler.batch_size) / len(data_loader.dataset), num_workers=data_loader.num_workers, collate_fn=data_loader.collate_fn, pin_memory=data_loader.pin_memory, @@ -375,14 +777,11 @@ def switch_generator(*, data_loader: DataLoader, generator): """ Creates new instance of a ``DataLoader``, with the exact same behaviour of the provided data loader, except for the source of randomness. - Typically used to enhance a user-provided data loader object with cryptographically secure random number generator - Args: data_loader: Any ``DataLoader`` object generator: Random number generator object - Returns: New ``DataLoader`` object with the exact same behaviour as the input data loader, except for the source of randomness. diff --git a/opacus/privacy_engine.py b/opacus/privacy_engine.py index 5a6a3307..192f3b62 100644 --- a/opacus/privacy_engine.py +++ b/opacus/privacy_engine.py @@ -1,4 +1,836 @@ -#!/usr/bin/env python3 +# #!/usr/bin/env python3 +# # Copyright (c) Meta Platforms, Inc. and affiliates. +# # +# # Licensed under the Apache License, Version 2.0 (the "License"); +# # you may not use this file except in compliance with the License. +# # You may obtain a copy of the License at +# # +# # http://www.apache.org/licenses/LICENSE-2.0 +# # +# # Unless required by applicable law or agreed to in writing, software +# # distributed under the License is distributed on an "AS IS" BASIS, +# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# # See the License for the specific language governing permissions and +# # limitations under the License +# import warnings +# from itertools import chain +# from typing import IO, Any, BinaryIO, Dict, List, Optional, Tuple, Union + +# import sys +# import os +# project_root = os.path.dirname(os.path.abspath(__file__)) + +# # Insert it at the beginning of the path list +# sys.path.insert(0, project_root) + + +# import torch +# from opacus.accountants import create_accountant +# from opacus.accountants.utils import get_noise_multiplier +# from opacus.data_loader import DPDataLoader, switch_generator +# from opacus.distributed import DifferentiallyPrivateDistributedDataParallel as DPDDP +# from opacus.grad_sample import ( +# AbstractGradSampleModule, +# GradSampleHooks, +# GradSampleModule, +# get_gsm_class, +# prepare_module, +# ) +# from opacus.optimizers import DPOptimizer, get_optimizer_class +# from opacus.schedulers import _GradClipScheduler, _NoiseScheduler +# from opacus.utils.fast_gradient_clipping_utils import DPLossFastGradientClipping +# from opacus.validators.module_validator import ModuleValidator +# from torch import nn, optim +# from torch.distributed._composable.fsdp import FSDPModule +# from torch.nn.parallel import DistributedDataParallel as DDP +# from torch.utils.data import DataLoader + + +# class PrivacyEngine: +# """ +# Main entry point to the Opacus API - use ``PrivacyEngine`` to enable differential +# privacy for your model training. + +# ``PrivacyEngine`` object encapsulates current privacy state (privacy budget + +# method it's been calculated) and exposes ``make_private`` method to wrap your +# PyTorch training objects with their private counterparts. + +# Example: +# >>> dataloader = demo_dataloader +# >>> model = MyCustomModel() +# >>> optimizer = torch.optim.SGD(model.parameters(), lr=0.05) +# >>> privacy_engine = PrivacyEngine() +# >>> +# >>> model, optimizer, dataloader = privacy_engine.make_private( +# ... module=model, +# ... optimizer=optimizer, +# ... data_loader=dataloader, +# ... noise_multiplier=1.0, +# ... max_grad_norm=1.0, +# ... ) +# >>> # continue training as normal +# """ + +# def __init__(self, *, accountant: str = "prv", secure_mode: bool = False): +# """ + +# Args: +# accountant: Accounting mechanism. Currently supported: +# - rdp (:class:`~opacus.accountants.RDPAccountant`) +# - gdp (:class:`~opacus.accountants.GaussianAccountant`) +# - prv (:class`~opacus.accountants.PRVAccountant`) +# secure_mode: Set to ``True`` if cryptographically strong DP guarantee is +# required. ``secure_mode=True`` uses secure random number generator for +# noise and shuffling (as opposed to pseudo-rng in vanilla PyTorch) and +# prevents certain floating-point arithmetic-based attacks. +# See :meth:`~opacus.optimizers.optimizer._generate_noise` for details. +# When set to ``True`` requires ``torchcsprng`` to be installed +# """ +# self.accountant = create_accountant(mechanism=accountant) +# self.secure_mode = secure_mode +# self.secure_rng = None +# self.dataset = None # only used to detect switching to a different dataset +# if self.secure_mode: +# try: +# import torchcsprng as csprng +# except ImportError as e: +# msg = ( +# "To use secure RNG, you must install the torchcsprng package! " +# "Check out the instructions here: https://github.com/pytorch/csprng#installation" +# ) +# raise ImportError(msg) from e + +# self.secure_rng = csprng.create_random_device_generator("/dev/urandom") +# else: +# warnings.warn( +# "Secure RNG turned off. This is perfectly fine for experimentation as it allows " +# "for much faster training performance, but remember to turn it on and retrain " +# "one last time before production with ``secure_mode`` turned on." +# ) + +# def _prepare_optimizer( +# self, +# *, +# optimizer: optim.Optimizer, +# noise_multiplier: float, +# max_grad_norm: Union[float, List[float]], +# expected_batch_size: int, +# loss_reduction: str = "mean", +# distributed: bool = False, +# clipping: str = "flat", +# noise_generator=None, +# grad_sample_mode="hooks", +# **kwargs, +# ) -> DPOptimizer: +# if isinstance(optimizer, DPOptimizer): +# optimizer = optimizer.original_optimizer + +# generator = None +# if self.secure_mode: +# generator = self.secure_rng +# elif noise_generator is not None: +# generator = noise_generator + +# optim_class = get_optimizer_class( +# clipping=clipping, +# distributed=distributed, +# grad_sample_mode=grad_sample_mode, +# ) + +# return optim_class( +# optimizer=optimizer, +# noise_multiplier=noise_multiplier, +# max_grad_norm=max_grad_norm, +# expected_batch_size=expected_batch_size, +# loss_reduction=loss_reduction, +# generator=generator, +# secure_mode=self.secure_mode, +# **kwargs, +# ) + +# def _prepare_data_loader( +# self, +# data_loader: DataLoader, +# *, +# poisson_sampling: bool, +# distributed: bool, +# batch_first: bool = True, +# rand_on_empty: bool = False, +# ) -> DataLoader: +# if self.dataset is None: +# self.dataset = data_loader.dataset +# elif self.dataset != data_loader.dataset: +# warnings.warn( +# f"PrivacyEngine detected new dataset object. " +# f"Was: {self.dataset}, got: {data_loader.dataset}. " +# f"Privacy accounting works per dataset, please initialize " +# f"new PrivacyEngine if you're using different dataset. " +# f"You can ignore this warning if two datasets above " +# f"represent the same logical dataset" +# ) + +# if poisson_sampling: +# return DPDataLoader.from_data_loader( +# data_loader, +# generator=self.secure_rng, +# distributed=distributed, +# batch_first=batch_first, +# rand_on_empty=rand_on_empty, +# ) +# elif self.secure_mode: +# return switch_generator(data_loader=data_loader, generator=self.secure_rng) +# else: +# return data_loader + +# def _prepare_model( +# self, +# module: nn.Module, +# *, +# batch_first: bool = True, +# max_grad_norm: Union[float, List[float]] = 1.0, +# loss_reduction: str = "mean", +# grad_sample_mode: str = "hooks", +# wrap_model: bool = True, +# ) -> Union[AbstractGradSampleModule, GradSampleHooks]: +# # Ideally, validation should have been taken care of by calling +# # `get_compatible_module()` +# self.validate(module=module, optimizer=None, data_loader=None) + +# # wrap +# if wrap_model and isinstance(module, AbstractGradSampleModule): +# if ( +# module.batch_first != batch_first +# or module.loss_reduction != loss_reduction +# or type(module) is not get_gsm_class(grad_sample_mode) +# ): +# raise ValueError( +# f"Pre-existing GradSampleModule doesn't match new arguments." +# f"Got: module.batch_first: {module.batch_first}, module.loss_reduction: {module.loss_reduction}, type(module): {type(module)}" +# f"Requested: batch_first:{batch_first}, loss_reduction: {loss_reduction}, grad_sample_mode: {grad_sample_mode} " +# f"Please pass vanilla nn.Module instead" +# ) + +# return module +# else: +# if grad_sample_mode in ["ghost", "ghost_fsdp"]: +# return prepare_module( +# module, +# grad_sample_mode=grad_sample_mode, +# batch_first=batch_first, +# loss_reduction=loss_reduction, +# max_grad_norm=max_grad_norm, +# wrap_model=wrap_model, +# ) +# else: +# return prepare_module( +# module, +# grad_sample_mode=grad_sample_mode, +# batch_first=batch_first, +# loss_reduction=loss_reduction, +# wrap_model=wrap_model, +# ) + +# def _prepare_criterion( +# self, +# *, +# module: GradSampleModule, +# optimizer: DPOptimizer, +# criterion, +# loss_reduction: str = "mean", +# **kwargs, +# ) -> DPLossFastGradientClipping: +# """ +# Args: +# module: GradSampleModule used for training, +# optimizer: DPOptimizer used for training, +# criterion: Loss function used for training, +# loss_reduction: "mean" or "sum", indicates if the loss reduction (for aggregating the gradients) + +# Prepare the DP loss class, which packages the two backward passes for fast gradient clipping. +# """ +# return DPLossFastGradientClipping(module, optimizer, criterion, loss_reduction) + +# def is_compatible( +# self, +# *, +# module: nn.Module, +# optimizer: Optional[optim.Optimizer], +# data_loader: Optional[DataLoader], +# ) -> bool: +# """ +# Check if task components are compatible with DP. + +# Args: +# module: module to be checked +# optimizer: optimizer to be checked +# data_loader: data_loader to be checked + +# Returns: +# ``True`` if compatible, ``False`` otherwise +# """ +# return ModuleValidator.is_valid(module) + +# def validate( +# self, +# *, +# module: nn.Module, +# optimizer: Optional[optim.Optimizer], +# data_loader: Optional[DataLoader], +# ): +# """ +# Validate that task components are compatible with DP. +# Same as ``is_compatible()``, but raises error instead of returning bool. + +# Args: +# module: module to be checked +# optimizer: optimizer to be checked +# data_loader: data_loader to be checked + +# Raises: +# UnsupportedModuleError +# If one or more modules found to be incompatible +# """ +# ModuleValidator.validate(module, strict=True) + +# @classmethod +# def get_compatible_module(cls, module: nn.Module) -> nn.Module: +# """ +# Return a privacy engine compatible module. Also validates the module after +# running registered fixes. + +# Args: +# module: module to be modified + +# Returns: +# Module with some submodules replaced for their deep copies or +# close equivalents. +# See :class:`~opacus.validators.module_validator.ModuleValidator` for +# more details +# """ +# module = ModuleValidator.fix(module) +# ModuleValidator.validate(module, strict=True) +# return module + +# def make_private( +# self, +# *, +# # metadata_epsilon: Optional[float] = None, # New Parameter +# module: nn.Module, +# optimizer: optim.Optimizer, +# criterion=nn.CrossEntropyLoss(), # Added deafult for backward compatibility +# data_loader: DataLoader, +# noise_multiplier: float, +# max_grad_norm: Union[float, List[float]], +# batch_first: bool = True, +# loss_reduction: str = "mean", +# poisson_sampling: bool = True, +# clipping: str = "flat", +# noise_generator=None, +# grad_sample_mode: str = "hooks", +# wrap_model: bool = True, +# rand_on_empty: bool = False, +# metadata_epsilon: Optional[float] = None, +# **kwargs, +# ) -> Union[ +# Tuple[ +# Union[AbstractGradSampleModule, GradSampleHooks], DPOptimizer, DataLoader +# ], +# Tuple[ +# Union[AbstractGradSampleModule, GradSampleHooks], +# DPOptimizer, +# DPLossFastGradientClipping, +# DataLoader, +# ], +# ]: +# """ +# Add privacy-related responsibilities to the main PyTorch training objects: +# model, optimizer, and the data loader. + +# All of the returned objects act just like their non-private counterparts +# passed as arguments, but with added DP tasks. + +# - Model is wrapped to also compute per sample gradients. +# - Optimizer is now responsible for gradient clipping and adding noise to the gradients. +# - Criterion is a wrapper around the original criterion that packages the two backward passes for fast gradient clipping. +# - DataLoader is updated to perform Poisson sampling. + +# Notes: +# Using any other models, optimizers, or data sources during training +# will invalidate stated privacy guarantees. + +# Args: +# module: PyTorch module to be used for training +# optimizer: Optimizer to be used for training +# data_loader: DataLoader to be used for training +# noise_multiplier: The ratio of the standard deviation of the Gaussian noise to +# the L2-sensitivity of the function to which the noise is added +# (How much noise to add) +# max_grad_norm: The maximum norm of the per-sample gradients. Any gradient with norm +# higher than this will be clipped to this value. +# batch_first: Flag to indicate if the input tensor to the corresponding module +# has the first dimension representing the batch. If set to True, dimensions on +# input tensor are expected be ``[batch_size, ...]``, otherwise +# ``[K, batch_size, ...]`` +# loss_reduction: Indicates if the loss reduction (for aggregating the gradients) +# is a sum or a mean operation. Can take values "sum" or "mean" +# poisson_sampling: ``True`` if you want to use standard sampling required +# for DP guarantees. Setting ``False`` will leave provided data_loader +# unchanged. Technically this doesn't fit the assumptions made by +# privacy accounting mechanism, but it can be a good approximation when +# using Poisson sampling is unfeasible. +# clipping: Per sample gradient clipping mechanism ("flat" or "per_layer" or "adaptive"). +# Flat clipping calculates the norm of the entire gradient over +# all parameters, per layer clipping sets individual norms for +# every parameter tensor, and adaptive clipping updates clipping bound per iteration. +# Flat clipping is usually preferred, but using per layer clipping in combination +# with distributed training can provide notable performance gains. +# noise_generator: torch.Generator() object used as a source of randomness for +# the noise +# grad_sample_mode: mode for computing per sample gradients. Determines the +# implementation class for the wrapped ``module``. See +# :class:`~opacus.grad_sample.gsm_base.AbstractGradSampleModule` for more +# details +# rand_on_empty: Indicates to return a batch containing random numbers when encountering +# empty batches samples with Poisson sampling rather than tensors with zero-length batch dimensions + +# Returns: +# Tuple of (hooks_or_module, optimizer, data_loader) or (hooks_or_module, optimizer, criterion, data_loader). + +# Returns a hooks object for gradient sampling and cleanup: +# - If wrap_model=True: Returns GradSampleModule wrapper (use as your model) +# - If wrap_model=False: Returns GradSampleHooks object (use your original model directly, +# use returned hooks only for cleanup) + +# The hooks object provides .cleanup() method. In non-wrapping mode, the original model +# passed to make_private() is unchanged - continue using it normally. + +# Optimizer is a wrapper around the original optimizer that also does +# gradient clipping and noise addition to the gradients +# Criterion is a wrapper around the original criterion that packages the two backward passes for fast gradient clipping. +# Only returned when grad_sample_mode is "ghost". +# DataLoader is a brand new DataLoader object, constructed to behave as +# equivalent to the original data loader, possibly with updated +# sampling mechanism. Points to the same dataset object. +# """ +# if noise_generator and self.secure_mode: +# raise ValueError("Passing seed is prohibited in secure mode") + +# # compare module parameter with optimizer parameters +# model_parameters = set(module.parameters()) +# for p in chain.from_iterable( +# [param_group["params"] for param_group in optimizer.param_groups] +# ): +# if p not in model_parameters: +# raise ValueError( +# "Module parameters are different than optimizer Parameters" +# ) + +# distributed = isinstance(module, (DPDDP, DDP, FSDPModule)) + +# module = self._prepare_model( +# module, +# batch_first=batch_first, +# max_grad_norm=max_grad_norm, +# loss_reduction=loss_reduction, +# grad_sample_mode=grad_sample_mode, +# wrap_model=wrap_model, +# ) +# if poisson_sampling: +# module.forbid_grad_accumulation() + +# data_loader = self._prepare_data_loader( +# data_loader, +# distributed=distributed, +# poisson_sampling=poisson_sampling, +# batch_first=batch_first, +# rand_on_empty=rand_on_empty, +# ) + +# # true_n = len(data_loader.dataset) + +# # 1. Capture the true dataset size +# true_n = len(data_loader.dataset) + +# # 2. Apply Laplace Noise if epsilon is provided +# if metadata_epsilon is not None: +# import numpy as np +# # Standard Laplace mechanism: scale = 1/epsilon +# noise = np.random.laplace(0, 1.0 / float(metadata_epsilon)) +# effective_n = max(1, true_n + noise) +# else: +# effective_n = true_n + +# # 3. Calculate sample_rate and expected_batch_size +# # Using the effective_n ensures that the metadata itself is private + + +# # sample_rate = data_loader.batch_size / true_n + +# # Safely get batch size (handles NoneType) +# # batch_size = getattr(data_loader, "batch_size", None) or data_loader.batch_sampler.batch_size + +# # 1. Try to find batch_size through standard methods +# batch_size = getattr(data_loader, "batch_size", None) + +# # 2. If it's a BatchSampler (like standard PyTorch) +# if batch_size is None and hasattr(data_loader, "batch_sampler"): +# batch_size = getattr(data_loader.batch_sampler, "batch_size", None) + +# # 3. If it's Opacus's UniformWithReplacementSampler (where sample_rate is the truth) +# if batch_size is None and hasattr(data_loader, "generator"): +# # For these samplers, batch_size is derived from sample_rate * total_n +# # But since we have the loader, we can often find it in the sampler itself +# sampler = getattr(data_loader, "sampler", None) +# batch_size = getattr(sampler, "sample_rate", 0.0) * true_n + +# # 4. Fallback: If we still can't find it, use the length of the first batch +# if not batch_size: +# # This is a safe baseline for many custom loaders +# sample_rate = 1 / len(data_loader) +# else: +# sample_rate = batch_size / true_n + +# expected_batch_size = int(effective_n * sample_rate) + +# # sample_rate = batch_size / true_n +# # expected_batch_size = int(effective_n * sample_rate) +# # expected_batch_size = int(effective_n * sample_rate) + +# # 4. (Optional but Professional) Log the change for your report +# if metadata_epsilon is not None: +# print(f"Metadata DP Enabled: Effective N set to {effective_n:.2f}") + +# print(f"\n>>> LIVE FROM PROJECT FOLDER: True N is {true_n} <<<") +# m_eps = kwargs.get("metadata_epsilon", None) +# print(f"\nDEBUG >> All Kwargs: {kwargs.keys()}") + + +# print(f"DEBUG >> Extracted metadata_epsilon: {m_eps}") + +# if m_eps is not None: +# import numpy as np +# # We use m_eps here +# noise = np.random.laplace(0, 1.0 / float(m_eps)) +# effective_n = max(1, true_n + noise) +# else: +# effective_n = true_n + + +# metadata_epsilon = kwargs.get("metadata_epsilon", None) + + +# if metadata_epsilon is not None: +# import numpy as np +# # The sensitivity of a count is 1. +# # We add Laplace noise to hide the exact number of participants. +# noise = np.random.laplace(0, 1.0 / metadata_epsilon) +# effective_n = max(1, true_n + noise) + +# print(f"DEBUG >> Noise Generated: {noise:.4f}") +# print(f"DEBUG >> Effective N: {effective_n:.4f}") +# print(f"DEBUG >> Sample Rate: {1/len(data_loader):.4f}") + +# warnings.warn( +# f"Metadata Privacy enabled. Actual N: {true_n}, " +# f"Noisy N for accounting: {effective_n:.2f}" +# ) + +# else: +# effective_n = true_n + + + +# # if m_eps is not None: +# # import numpy as np +# # # FORCE a massive change to see if it works +# # noise = 500 +# # effective_n = true_n + noise +# # print(f"\n>>> DEBUG: FORCING NOISY N: {effective_n} <<<") + + + +# # sample_rate = 1 / len(data_loader) + +# from torch.utils.data import WeightedRandomSampler + +# # Detect WeightedRandomSampler BEFORE it gets replaced by Opacus +# if isinstance(data_loader.sampler, WeightedRandomSampler): +# warnings.warn( +# "WeightedRandomSampler detected. Opacus will replace it with " +# "UniformWithReplacementSampler for Poisson sampling. " +# "sample_rate will be recomputed as batch_size / len(dataset) " +# "to ensure correct privacy accounting.", +# UserWarning, +# ) +# batch_size = data_loader.batch_size +# sample_rate = batch_size / len(data_loader.dataset) +# else: +# sample_rate = 1 / len(data_loader) + +# expected_batch_size = int(len(data_loader.dataset) * sample_rate) + +# # expected_batch_size is the *per worker* batch size +# if distributed: +# world_size = torch.distributed.get_world_size() +# expected_batch_size /= world_size + +# optimizer = self._prepare_optimizer( +# optimizer=optimizer, +# noise_multiplier=noise_multiplier, +# max_grad_norm=max_grad_norm, +# expected_batch_size=expected_batch_size, +# loss_reduction=loss_reduction, +# noise_generator=noise_generator, +# distributed=distributed, +# clipping=clipping, +# grad_sample_mode=grad_sample_mode, +# **kwargs, +# ) + +# optimizer.attach_step_hook( +# self.accountant.get_optimizer_hook_fn(sample_rate=sample_rate) +# ) +# if "ghost" in grad_sample_mode: +# criterion = self._prepare_criterion( +# module=module, +# optimizer=optimizer, +# criterion=criterion, +# loss_reduction=loss_reduction, +# **kwargs, +# ) + +# return module, optimizer, criterion, data_loader + +# return module, optimizer, data_loader + +# def make_private_with_epsilon( +# self, +# *, +# module: nn.Module, +# optimizer: optim.Optimizer, +# criterion=nn.CrossEntropyLoss(), # Added deafult for backward compatibility +# data_loader: DataLoader, +# target_epsilon: float, +# target_delta: float, +# epochs: int, +# max_grad_norm: Union[float, List[float]], +# batch_first: bool = True, +# loss_reduction: str = "mean", +# poisson_sampling: bool = True, +# clipping: str = "flat", +# noise_generator=None, +# grad_sample_mode: str = "hooks", +# wrap_model: bool = True, +# metadata_epsilon: float = None, +# **kwargs, + +# ) -> Union[ +# Tuple[ +# Union[AbstractGradSampleModule, GradSampleHooks], DPOptimizer, DataLoader +# ], +# Tuple[ +# Union[AbstractGradSampleModule, GradSampleHooks], +# DPOptimizer, +# DPLossFastGradientClipping, +# DataLoader, +# ], +# ]: +# """ +# Version of :meth:`~opacus.privacy_engine.PrivacyEngine.make_private`, +# that calculates privacy parameters based on a given privacy budget. + +# For the full documentation see +# :meth:`~opacus.privacy_engine.PrivacyEngine.make_private` + +# Args: +# module: PyTorch module to be used for training +# optimizer: Optimizer to be used for training +# data_loader: DataLoader to be used for training +# target_epsilon: Target epsilon to be achieved, a metric of privacy loss at differential changes in data. +# target_delta: Target delta to be achieved. Probability of information being leaked. +# epochs: Number of training epochs you intend to perform; noise_multiplier relies on this to calculate +# an appropriate sigma to ensure privacy budget of (target_epsilon, target_delta) at the end +# of epochs. +# max_grad_norm: The maximum norm of the per-sample gradients. Any gradient with norm +# higher than this will be clipped to this value. +# batch_first: Flag to indicate if the input tensor to the corresponding module +# has the first dimension representing the batch. If set to True, dimensions on +# input tensor are expected be ``[batch_size, ...]``, otherwise +# ``[K, batch_size, ...]`` +# loss_reduction: Indicates if the loss reduction (for aggregating the gradients) +# is a sum or a mean operation. Can take values "sum" or "mean" +# poisson_sampling: ``True`` if you want to use standard sampling required +# for DP guarantees. Setting ``False`` will leave provided data_loader +# unchanged. Technically this doesn't fit the assumptions made by +# privacy accounting mechanism, but it can be a good approximation when +# using Poisson sampling is unfeasible. +# clipping: Per sample gradient clipping mechanism ("flat" or "per_layer" or "adaptive"). +# Flat clipping calculates the norm of the entire gradient over +# all parameters, per layer clipping sets individual norms for +# every parameter tensor, and adaptive clipping updates clipping bound per iteration. +# Flat clipping is usually preferred, but using per layer clipping in combination +# with distributed training can provide notable performance gains. +# noise_generator: torch.Generator() object used as a source of randomness for +# the noise +# grad_sample_mode: mode for computing per sample gradients. Determines the +# implementation class for the wrapped ``module``. See +# :class:`~opacus.grad_sample.gsm_base.AbstractGradSampleModule` for more +# details +# wrap_model: If True (default), wraps module in GradSampleModule. +# If False, uses non-wrapping mode - attaches hooks directly to the provided model +# without wrapping. The original model remains unchanged and can be used normally. +# Cleanup via returned hooks.cleanup() is required when done. Recommended for +# HuggingFace Transformers and models with custom __getattr__ that don't work well with wrapping. + +# Returns: +# Tuple of (hooks_or_module, optimizer, data_loader) or (hooks_or_module, optimizer, criterion, data_loader). + +# Returns a hooks object for gradient sampling and cleanup: +# - If wrap_model=True: Returns GradSampleModule wrapper (use as your model) +# - If wrap_model=False: Returns GradSampleHooks object (use your original model directly, +# use returned hooks only for cleanup) + +# The hooks object provides .cleanup() method. In non-wrapping mode, the original model +# passed to make_private() is unchanged - continue using it normally. + +# Optimizer is a wrapper around the original optimizer that also does +# gradient clipping and noise addition to the gradients +# Criterion is a wrapper around the original criterion that packages the two backward passes for fast gradient clipping. +# Only returned when grad_sample_mode is "ghost". +# DataLoader is a brand new DataLoader object, constructed to behave as +# equivalent to the original data loader, possibly with updated +# sampling mechanism. Points to the same dataset object. +# """ +# sample_rate = 1 / len(data_loader) + +# if len(self.accountant) > 0: +# warnings.warn( +# "You're calling make_private_with_epsilon with non-zero privacy budget " +# "already spent. Returned noise_multiplier assumes zero starting point, " +# "so your overall privacy budget will be higher." +# ) + +# return self.make_private( +# module=module, +# optimizer=optimizer, +# data_loader=data_loader, +# criterion=criterion, +# noise_multiplier=get_noise_multiplier( +# target_epsilon=target_epsilon, +# target_delta=target_delta, +# sample_rate=sample_rate, +# epochs=epochs, +# accountant=self.accountant.mechanism(), +# **kwargs, +# ), +# max_grad_norm=max_grad_norm, +# batch_first=batch_first, +# loss_reduction=loss_reduction, +# noise_generator=noise_generator, +# grad_sample_mode=grad_sample_mode, +# poisson_sampling=poisson_sampling, +# clipping=clipping, +# wrap_model=wrap_model, +# metadata_epsilon=metadata_epsilon, +# **kwargs, +# ) + +# def get_epsilon(self, delta): +# """ +# Computes the (epsilon, delta) privacy budget spent so far. + +# Args: +# delta: The target delta. + +# Returns: +# Privacy budget (epsilon) expended so far. +# """ +# return self.accountant.get_epsilon(delta) + +# def save_checkpoint( +# self, +# *, +# path: Union[str, os.PathLike, BinaryIO, IO[bytes]], +# module: Union[nn.Module, GradSampleModule], +# optimizer: Optional[DPOptimizer] = None, +# noise_scheduler: Optional[_NoiseScheduler] = None, +# grad_clip_scheduler: Optional[_GradClipScheduler] = None, +# checkpoint_dict: Optional[Dict[str, Any]] = None, +# module_state_dict_kwargs: Optional[Dict[str, Any]] = None, +# torch_save_kwargs: Optional[Dict[str, Any]] = None, +# ): +# """ +# Saves the state_dict of module, optimizer, and accountant at path. +# Args: +# path: Path to save the state dict objects. +# module: nn.Module or GradSampleModule to save; wrapped module's state_dict is saved. +# optimizer: DPOptimizer to save; wrapped optimizer's state_dict is saved. +# noise_scheduler: _NoiseScheduler whose state we should save. +# grad_clip_scheduler: _GradClipScheduler whose state we should save. +# checkpoint_dict: Dict[str, Any]; an already-filled checkpoint dict. +# module_state_dict_kwargs: dict of kwargs to pass to ``module.state_dict()`` +# torch_save_kwargs: dict of kwargs to pass to ``torch.save()`` + +# """ +# checkpoint_dict = checkpoint_dict or {} +# checkpoint_dict["module_state_dict"] = module.state_dict( +# **(module_state_dict_kwargs or {}) +# ) +# checkpoint_dict["privacy_accountant_state_dict"] = self.accountant.state_dict() +# if optimizer is not None: +# checkpoint_dict["optimizer_state_dict"] = optimizer.state_dict() +# if noise_scheduler is not None: +# checkpoint_dict["noise_scheduler_state_dict"] = noise_scheduler.state_dict() +# if grad_clip_scheduler is not None: +# checkpoint_dict["grad_clip_scheduler_state_dict"] = ( +# grad_clip_scheduler.state_dict() +# ) + +# torch.save(checkpoint_dict, path, **(torch_save_kwargs or {})) + +# def load_checkpoint( +# self, +# *, +# path: Union[str, os.PathLike, BinaryIO, IO[bytes]], +# module: Union[nn.Module, GradSampleModule], +# optimizer: Optional[DPOptimizer] = None, +# noise_scheduler: Optional[_NoiseScheduler] = None, +# grad_clip_scheduler: Optional[_GradClipScheduler] = None, +# module_load_dict_kwargs: Optional[Dict[str, Any]] = None, +# torch_load_kwargs: Optional[Dict[str, Any]] = None, +# ) -> Dict: +# checkpoint = torch.load(path, **(torch_load_kwargs or {}), weights_only=False) +# module.load_state_dict( +# checkpoint["module_state_dict"], **(module_load_dict_kwargs or {}) +# ) +# self.accountant.load_state_dict(checkpoint["privacy_accountant_state_dict"]) + +# optimizer_state_dict = checkpoint.pop("optimizer_state_dict", {}) +# if optimizer is not None and len(optimizer_state_dict) > 0: +# optimizer.load_state_dict(optimizer_state_dict) +# elif (optimizer is not None) ^ (len(optimizer_state_dict) > 0): +# # warn if only one of them is available +# warnings.warn( +# f"optimizer_state_dict has {len(optimizer_state_dict)} items" +# f" but optimizer is {'' if optimizer else 'not'} provided." +# ) + +# noise_scheduler_state_dict = checkpoint.pop("noise_scheduler_state_dict", {}) +# if noise_scheduler is not None and len(noise_scheduler_state_dict) > 0: +# noise_scheduler.load_state_dict(noise_scheduler_state_dict) + +# grad_clip_scheduler_state_dict = checkpoint.pop( +# "grad_clip_scheduler_state_dict", {} +# ) +# if grad_clip_scheduler is not None and len(grad_clip_scheduler_state_dict) > 0: +# grad_clip_scheduler.load_state_dict(grad_clip_scheduler_state_dict) + +# return checkpoint + +# # print(f"SYSTEM CHECK: Calculating Noisy N for epsilon {metadata_epsilon}") + + +# !/usr/bin/env python3 # Copyright (c) Meta Platforms, Inc. and affiliates. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -18,6 +850,7 @@ from typing import IO, Any, BinaryIO, Dict, List, Optional, Tuple, Union import torch +from torch.utils.data.sampler import WeightedRandomSampler from opacus.accountants import create_accountant from opacus.accountants.utils import get_noise_multiplier from opacus.data_loader import DPDataLoader, switch_generator @@ -43,11 +876,9 @@ class PrivacyEngine: """ Main entry point to the Opacus API - use ``PrivacyEngine`` to enable differential privacy for your model training. - ``PrivacyEngine`` object encapsulates current privacy state (privacy budget + method it's been calculated) and exposes ``make_private`` method to wrap your PyTorch training objects with their private counterparts. - Example: >>> dataloader = demo_dataloader >>> model = MyCustomModel() @@ -66,7 +897,6 @@ class PrivacyEngine: def __init__(self, *, accountant: str = "prv", secure_mode: bool = False): """ - Args: accountant: Accounting mechanism. Currently supported: - rdp (:class:`~opacus.accountants.RDPAccountant`) @@ -238,7 +1068,6 @@ def _prepare_criterion( optimizer: DPOptimizer used for training, criterion: Loss function used for training, loss_reduction: "mean" or "sum", indicates if the loss reduction (for aggregating the gradients) - Prepare the DP loss class, which packages the two backward passes for fast gradient clipping. """ return DPLossFastGradientClipping(module, optimizer, criterion, loss_reduction) @@ -252,12 +1081,10 @@ def is_compatible( ) -> bool: """ Check if task components are compatible with DP. - Args: module: module to be checked optimizer: optimizer to be checked data_loader: data_loader to be checked - Returns: ``True`` if compatible, ``False`` otherwise """ @@ -273,12 +1100,10 @@ def validate( """ Validate that task components are compatible with DP. Same as ``is_compatible()``, but raises error instead of returning bool. - Args: module: module to be checked optimizer: optimizer to be checked data_loader: data_loader to be checked - Raises: UnsupportedModuleError If one or more modules found to be incompatible @@ -290,10 +1115,8 @@ def get_compatible_module(cls, module: nn.Module) -> nn.Module: """ Return a privacy engine compatible module. Also validates the module after running registered fixes. - Args: module: module to be modified - Returns: Module with some submodules replaced for their deep copies or close equivalents. @@ -336,19 +1159,15 @@ def make_private( """ Add privacy-related responsibilities to the main PyTorch training objects: model, optimizer, and the data loader. - All of the returned objects act just like their non-private counterparts passed as arguments, but with added DP tasks. - - Model is wrapped to also compute per sample gradients. - Optimizer is now responsible for gradient clipping and adding noise to the gradients. - Criterion is a wrapper around the original criterion that packages the two backward passes for fast gradient clipping. - DataLoader is updated to perform Poisson sampling. - Notes: Using any other models, optimizers, or data sources during training will invalidate stated privacy guarantees. - Args: module: PyTorch module to be used for training optimizer: Optimizer to be used for training @@ -383,18 +1202,14 @@ def make_private( details rand_on_empty: Indicates to return a batch containing random numbers when encountering empty batches samples with Poisson sampling rather than tensors with zero-length batch dimensions - Returns: Tuple of (hooks_or_module, optimizer, data_loader) or (hooks_or_module, optimizer, criterion, data_loader). - Returns a hooks object for gradient sampling and cleanup: - If wrap_model=True: Returns GradSampleModule wrapper (use as your model) - If wrap_model=False: Returns GradSampleHooks object (use your original model directly, use returned hooks only for cleanup) - The hooks object provides .cleanup() method. In non-wrapping mode, the original model passed to make_private() is unchanged - continue using it normally. - Optimizer is a wrapper around the original optimizer that also does gradient clipping and noise addition to the gradients Criterion is a wrapper around the original criterion that packages the two backward passes for fast gradient clipping. @@ -507,10 +1322,8 @@ def make_private_with_epsilon( """ Version of :meth:`~opacus.privacy_engine.PrivacyEngine.make_private`, that calculates privacy parameters based on a given privacy budget. - For the full documentation see :meth:`~opacus.privacy_engine.PrivacyEngine.make_private` - Args: module: PyTorch module to be used for training optimizer: Optimizer to be used for training @@ -550,18 +1363,14 @@ def make_private_with_epsilon( without wrapping. The original model remains unchanged and can be used normally. Cleanup via returned hooks.cleanup() is required when done. Recommended for HuggingFace Transformers and models with custom __getattr__ that don't work well with wrapping. - Returns: Tuple of (hooks_or_module, optimizer, data_loader) or (hooks_or_module, optimizer, criterion, data_loader). - Returns a hooks object for gradient sampling and cleanup: - If wrap_model=True: Returns GradSampleModule wrapper (use as your model) - If wrap_model=False: Returns GradSampleHooks object (use your original model directly, use returned hooks only for cleanup) - The hooks object provides .cleanup() method. In non-wrapping mode, the original model passed to make_private() is unchanged - continue using it normally. - Optimizer is a wrapper around the original optimizer that also does gradient clipping and noise addition to the gradients Criterion is a wrapper around the original criterion that packages the two backward passes for fast gradient clipping. @@ -570,7 +1379,23 @@ def make_private_with_epsilon( equivalent to the original data loader, possibly with updated sampling mechanism. Points to the same dataset object. """ - sample_rate = 1 / len(data_loader) + # Infer batch_size, handling custom batch_sampler where data_loader.batch_size is None + batch_size = data_loader.batch_size + if batch_size is None: + # Fallback to infer from batch_sampler + batch_size = getattr(data_loader.batch_sampler, 'batch_size', 1) + sample_rate = batch_size / len(data_loader.dataset) + + if isinstance(data_loader.sampler, WeightedRandomSampler): + warnings.warn( + "WeightedRandomSampler detected. Opacus replaces it with " + "UniformWithReplacementSampler for Poisson sampling. " + "Privacy accounting uses batch_size/dataset_size as sample_rate. " + "Your original sampler configuration will be overridden. " + "If you need weighted sampling, consider using a custom implementation " + "compatible with differential privacy.", + UserWarning, + ) if len(self.accountant) > 0: warnings.warn( @@ -606,10 +1431,8 @@ def make_private_with_epsilon( def get_epsilon(self, delta): """ Computes the (epsilon, delta) privacy budget spent so far. - Args: delta: The target delta. - Returns: Privacy budget (epsilon) expended so far. """ @@ -638,7 +1461,6 @@ def save_checkpoint( checkpoint_dict: Dict[str, Any]; an already-filled checkpoint dict. module_state_dict_kwargs: dict of kwargs to pass to ``module.state_dict()`` torch_save_kwargs: dict of kwargs to pass to ``torch.save()`` - """ checkpoint_dict = checkpoint_dict or {} checkpoint_dict["module_state_dict"] = module.state_dict( @@ -693,4 +1515,4 @@ def load_checkpoint( if grad_clip_scheduler is not None and len(grad_clip_scheduler_state_dict) > 0: grad_clip_scheduler.load_state_dict(grad_clip_scheduler_state_dict) - return checkpoint + return checkpoint \ No newline at end of file