From d6e420b7428f34910bf8e8987f5ba779bd154aa2 Mon Sep 17 00:00:00 2001 From: ngk123 Date: Tue, 1 Sep 2020 19:42:26 -0400 Subject: [PATCH 1/5] adding file for FGSM adversarial attack --- URSABench/tasks/adver_attack.py | 292 ++++++++++++++++++++++++++++++++ 1 file changed, 292 insertions(+) create mode 100644 URSABench/tasks/adver_attack.py diff --git a/URSABench/tasks/adver_attack.py b/URSABench/tasks/adver_attack.py new file mode 100644 index 0000000..f87595e --- /dev/null +++ b/URSABench/tasks/adver_attack.py @@ -0,0 +1,292 @@ +import numpy as np +import torch +import torch.nn.functional as F +from sklearn.metrics import roc_auc_score as auroc, average_precision_score as prauc + +from .task_base import _Task +from .. import util + +__all__ = ['Adversarial_attack'] + + +class Adversarial_attack(_Task): + supported_metric_list = ['error_rate', 'nll', 'll', 'brier_score', 'ece', 'misclass_model_uncertainty_auroc', + 'misclass_model_uncertainty_aucpr', 'misclass_total_uncertainty_auroc', + 'misclass_total_uncertainty_aucpr', 'misclass_confidence_auroc', + 'misclass_confidence_aucpr'] + + def __init__(self, dataloader, num_classes, device, metric_list, attack='FGSM', l_inf_norm=0.1): + super(Adversarial_attack, self).__init__(dataloader, num_classes, device) + self.attack = attack + self.l_inf_norm = 0.1 + self.data_loader = dataloader['in_distribution_test'] + self.num_classes = num_classes + self.device = device + self.num_samples_collected = 0 + self.ensemble_proba = torch.zeros(len(self.data_loader.dataset), num_classes) + self.expected_data_uncertainty = torch.zeros(len(self.data_loader.dataset)) + self.required_metric_list = self.supported_metric_list if metric_list == 'ALL' else metric_list + assert all(metric in self.supported_metric_list for metric in self.required_metric_list) + self.targets = list() + for batch_idx, (batch_data, batch_labels) in enumerate(self.data_loader): + self.targets.append(batch_labels) + self.targets = torch.cat(self.targets) + + def reset(self): + self.num_samples_collected = 0 + self.ensemble_proba = torch.zeros(len(self.data_loader.dataset), self.num_classes) + + def generate_adversarial_examples(self, models, output_performance=True, smoothing=True): + """ + Inout : Models + [Use Input_example, attack_type, variables_defining_attack] + Output : adeversarial examples, performance metrics + Note : For now only FGSM is implemented + """ + + if isinstance(models, list): + if all(issubclass(model.__class__, torch.nn.Module) for model in models): + num_models = len(models) + self.num_samples_collected += num_models + else: + raise NotImplementedError + else: + if issubclass(models.__class__, torch.nn.Module): + self.num_samples_collected += 1 + else: + raise NotImplementedError + + start_idx = 0 + + for batch_idx, (batch_data, batch_labels) in enumerate(self.data_loader): + end_idx = start_idx + len(batch_data) + batch_data = batch_data.to(self.device) + batch_data.requires_grad = True + if isinstance(models, list): + for model_idx, model in enumerate(models): + model.to(self.device) + batch_logits = model(batch_data) + self.ensemble_proba[start_idx: end_idx] += F.log_softmax(batch_logits, dim=-1).exp_().cpu() + self.expected_data_uncertainty[start_idx: end_idx] += \ + util.compute_predictive_entropy(util.central_smoothing( + F.log_softmax(batch_logits, dim=-1).exp_().cpu())) + model.to('cpu') + else: + # Here models indicates a single model. + models.to(self.device) + models.eval() + batch_logits = models(batch_data) + self.ensemble_proba[start_idx: end_idx] += F.log_softmax(batch_logits, dim=-1).exp_().cpu() + self.expected_data_uncertainty[start_idx: end_idx] += \ + util.compute_predictive_entropy(util.central_smoothing( + F.log_softmax(batch_logits, dim=-1).exp_().cpu())) + models.to('cpu') + start_idx = end_idx + + ''' + Keeping reduction method 'none' to get loss contibution of every data-case individually so that gradient + will be calculated properly as per formula of FGSM + ''' + log_likelihood = F.nll_loss(torch.log(self.ensemble_proba/self.num_samples_collected), self.targets, reduction='none') + log_likelihood.backward() + output_adversarial_examples = list() + + for batch_idx, (batch_data, batch_labels) in enumerate(self.data_loader): + batch_data = batch_data + self.l_inf_norm * batch_data.grad.sign + output_adversarial_examples.append(batch_data) + + perf_metrics = None + if output_performance: + perf_metrics = self.get_performance_metrics(output_performance, smoothing) + + return output_adversarial_examples, perf_metrics + + def get_performance_metrics(self, output_performance=False, smoothing = True): + output_dict = {} + for metric in self.required_metric_list: + if metric == 'error_rate': + accuracy = np.mean(np.argmax(self.ensemble_proba.numpy() / self.num_samples_collected, axis=1) == + self.targets.numpy()) + output_dict[metric] = 1 - accuracy + if metric == 'nll' or metric == 'll': + if smoothing: + nll = F.nll_loss( + torch.log(util.central_smoothing(self.ensemble_proba / self.num_samples_collected)), + self.targets) + else: + nll = F.nll_loss(torch.log(self.ensemble_proba / self.num_samples_collected), self.targets) + if metric == 'll': + output_dict[metric] = - nll.item() + else: + output_dict[metric] = nll.item() + if metric == 'brier_score': + output_dict[metric] = _get_brier((self.ensemble_proba / self.num_samples_collected).numpy(), + self.targets.numpy()) + if metric == 'ece': + output_dict[metric] = _get_ece((self.ensemble_proba / self.num_samples_collected).numpy(), + self.targets.numpy()) + if metric == 'misclass_model_uncertainty_auroc': + output_dict[metric] = _get_misclass_auroc( + util.central_smoothing(self.ensemble_proba / self.num_samples_collected).numpy(), + self.targets.numpy(), criterion='model_uncertainty', topk=1, + expected_data_uncertainty_array=( + self.expected_data_uncertainty / self.num_samples_collected).numpy()) + if metric == 'misclass_model_uncertainty_aucpr': + output_dict[metric] = _get_misclass_aucpr( + util.central_smoothing(self.ensemble_proba / self.num_samples_collected).numpy(), + self.targets.numpy(), criterion='model_uncertainty', topk=1, + expected_data_uncertainty_array=( + self.expected_data_uncertainty / self.num_samples_collected).numpy()) + + if metric == 'misclass_total_uncertainty_auroc': + output_dict[metric] = _get_misclass_auroc( + util.central_smoothing(self.ensemble_proba / self.num_samples_collected).numpy(), + self.targets.numpy(), criterion='entropy', topk=1, + expected_data_uncertainty_array=( + self.expected_data_uncertainty / self.num_samples_collected).numpy()) + + if metric == 'misclass_total_uncertainty_aucpr': + output_dict[metric] = _get_misclass_aucpr( + util.central_smoothing(self.ensemble_proba / self.num_samples_collected).numpy(), + self.targets.numpy(), criterion='entropy', topk=1, + expected_data_uncertainty_array=( + self.expected_data_uncertainty / self.num_samples_collected).numpy()) + + if metric == 'misclass_confidence_auroc': + output_dict[metric] = _get_misclass_auroc( + util.central_smoothing(self.ensemble_proba / self.num_samples_collected).numpy(), + self.targets.numpy(), criterion='confidence', topk=1, + expected_data_uncertainty_array=( + self.expected_data_uncertainty / self.num_samples_collected).numpy()) + + if metric == 'misclass_confidence_aucpr': + output_dict[metric] = _get_misclass_aucpr( + util.central_smoothing(self.ensemble_proba / self.num_samples_collected).numpy(), + self.targets.numpy(), criterion='confidence', topk=1, + expected_data_uncertainty_array=( + self.expected_data_uncertainty / self.num_samples_collected).numpy()) + + if output_performance: + if len(self.required_metric_list) != 1: + raise RuntimeError('Multiple metrics in metric list not suitable for output_performance = True') + return float(output_dict[self.required_metric_list[0]]) + else: + return output_dict + + +def _get_ece(preds, targets, n_bins=15): + """ + ECE ported from Asukha et al., 2020. + :param preds: Prediction probabilities in a Numpy array + :param targets: Targets in a numpy array + :param n_bins: Total number of bins to use. + :return: Expected calibration error. + """ + bin_boundaries = np.linspace(0, 1, n_bins + 1) + bin_lowers = bin_boundaries[:-1] + bin_uppers = bin_boundaries[1:] + + confidences, predictions = np.max(preds, 1), np.argmax(preds, 1) + accuracies = (predictions == targets) + + ece = 0.0 + avg_confs_in_bins = [] + for bin_lower, bin_upper in zip(bin_lowers, bin_uppers): + in_bin = np.logical_and(confidences > bin_lower, confidences <= bin_upper) + prop_in_bin = np.mean(in_bin) + if prop_in_bin > 0: + accuracy_in_bin = np.mean(accuracies[in_bin]) + avg_confidence_in_bin = np.mean(confidences[in_bin]) + delta = avg_confidence_in_bin - accuracy_in_bin + avg_confs_in_bins.append(delta) + ece += np.abs(delta) * prop_in_bin + else: + avg_confs_in_bins.append(None) + # For reliability diagrams, also need to return these: + # return ece, bin_lowers, avg_confs_in_bins + return ece + + +def _get_brier(preds, targets): + """ + Function to compute Brier score as ported from Asukha et al., 2020. + :param preds: Prediction probabilities in a numpy array + :param targets: Targets in a numpy array + :return: Brier score. + """ + one_hot_targets = np.zeros(preds.shape) + one_hot_targets[np.arange(len(targets)), targets] = 1.0 + return np.mean(np.sum((preds - one_hot_targets) ** 2, axis=1)) + + +def _misclass_tgt(output, target, topk=(1,)): + """ + Internal method for misclassification detection. + :param output: Prediction probabilities as a torch.Tensor + :param target: Targets as a torch.Tensor + :param topk: Top-k class-probabilities to consider. + :return: + """ + output = torch.Tensor(output) + target = torch.LongTensor(target) + with torch.no_grad(): + maxk = max(topk) + + _, pred = output.topk(maxk, 1, True, True) + pred = pred.t() + correct = pred.eq(target.view(1, -1).expand_as(pred)) + + res = [] + for k in topk: + correct_k = correct[:k].float().sum(0) + res.append(correct_k) + + return res[0].numpy() + + +def _get_misclass_auroc(preds, targets, criterion, topk=1, expected_data_uncertainty_array=None): + """ + Get AUROC for Misclassification detection + :param preds: Prediction probabilities as numpy array + :param targets: Targets as numpy array + :param criterion: Criterion to use for scoring on misclassification detection. + :param topk: Top-kl class probabilities to consider while making predictions. + :param expected_data_uncertainty_array: Expected data uncertainty as numpy array + :return: AUROC on misclassification detection + """ + misclassification_targets = (1 - _misclass_tgt(preds, targets, (topk,))).astype(bool) + + if criterion == 'entropy': + criterion_values = np.sum(-preds * np.log(preds), axis=1) + elif criterion == 'confidence': + criterion_values = -preds.max(axis=1) + elif criterion == 'model_uncertainty': + criterion_values = np.sum(-preds * np.log(preds), axis=1) - expected_data_uncertainty_array + else: + raise NotImplementedError + + return auroc(misclassification_targets, criterion_values) + + +def _get_misclass_aucpr(preds, targets, criterion, topk=1, expected_data_uncertainty_array=None): + """ + Get AUPRC for Misclassification detection + :param preds: Prediction probabilities as numpy array + :param targets: Targets as numpy array + :param criterion: Criterion to use for scoring on misclassification detection. + :param topk: Top-kl class probabilities to consider while making predictions. + :param expected_data_uncertainty_array: Expected data uncertainty as numpy array + :return: AUPRC on misclassification detection + """ + misclassification_targets = (1 - _misclass_tgt(preds, targets, (topk,))).astype(bool) + + if criterion == 'entropy': + criterion_values = np.sum(-preds * np.log(preds), axis=1) + elif criterion == 'confidence': + criterion_values = -preds.max(axis=1) + elif criterion == 'model_uncertainty': + criterion_values = np.sum(-preds * np.log(preds), axis=1) - expected_data_uncertainty_array + else: + raise NotImplementedError + + return prauc(misclassification_targets, criterion_values) From f96e11fb96835e4d39e827b2a89cfaf717a7786b Mon Sep 17 00:00:00 2001 From: ngk123 Date: Tue, 1 Sep 2020 21:02:21 -0400 Subject: [PATCH 2/5] calculating adv images per batch due to requires_grad issue --- URSABench/tasks/adver_attack.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/URSABench/tasks/adver_attack.py b/URSABench/tasks/adver_attack.py index f87595e..ee21f75 100644 --- a/URSABench/tasks/adver_attack.py +++ b/URSABench/tasks/adver_attack.py @@ -56,6 +56,7 @@ def generate_adversarial_examples(self, models, output_performance=True, smoothi else: raise NotImplementedError + output_adversarial_examples = list() start_idx = 0 for batch_idx, (batch_data, batch_labels) in enumerate(self.data_loader): @@ -81,19 +82,19 @@ def generate_adversarial_examples(self, models, output_performance=True, smoothi util.compute_predictive_entropy(util.central_smoothing( F.log_softmax(batch_logits, dim=-1).exp_().cpu())) models.to('cpu') - start_idx = end_idx - - ''' - Keeping reduction method 'none' to get loss contibution of every data-case individually so that gradient - will be calculated properly as per formula of FGSM - ''' - log_likelihood = F.nll_loss(torch.log(self.ensemble_proba/self.num_samples_collected), self.targets, reduction='none') - log_likelihood.backward() - output_adversarial_examples = list() - for batch_idx, (batch_data, batch_labels) in enumerate(self.data_loader): - batch_data = batch_data + self.l_inf_norm * batch_data.grad.sign + targets_this_batch = self.targets[start_idx: end_idx] + + # Keeping reduction method 'none' to get loss + # contibution of every data-case individually so that gradient + # will be calculated properly as per formula of FGSM + log_likelihood = F.nll_loss(torch.log(self.ensemble_proba[start_idx: end_idx]/self.num_samples_collected), targets_this_batch, reduction='none') + log_likelihood.backward() + batch_data = batch_data + self.l_inf_norm * batch_data.grad.detach().sign() output_adversarial_examples.append(batch_data) + start_idx = end_idx + + output_adversarial_examples = torch.cat(output_adversarial_examples) perf_metrics = None if output_performance: From 4a16acb2afb925e1db4d2a38875609acce1ca35d Mon Sep 17 00:00:00 2001 From: ngk123 Date: Tue, 1 Sep 2020 22:15:24 -0400 Subject: [PATCH 3/5] cleaned the code and made only fgsm attack specific --- URSABench/tasks/adver_attack.py | 293 -------------------------------- URSABench/tasks/fgsm_attack.py | 83 +++++++++ 2 files changed, 83 insertions(+), 293 deletions(-) delete mode 100644 URSABench/tasks/adver_attack.py create mode 100644 URSABench/tasks/fgsm_attack.py diff --git a/URSABench/tasks/adver_attack.py b/URSABench/tasks/adver_attack.py deleted file mode 100644 index ee21f75..0000000 --- a/URSABench/tasks/adver_attack.py +++ /dev/null @@ -1,293 +0,0 @@ -import numpy as np -import torch -import torch.nn.functional as F -from sklearn.metrics import roc_auc_score as auroc, average_precision_score as prauc - -from .task_base import _Task -from .. import util - -__all__ = ['Adversarial_attack'] - - -class Adversarial_attack(_Task): - supported_metric_list = ['error_rate', 'nll', 'll', 'brier_score', 'ece', 'misclass_model_uncertainty_auroc', - 'misclass_model_uncertainty_aucpr', 'misclass_total_uncertainty_auroc', - 'misclass_total_uncertainty_aucpr', 'misclass_confidence_auroc', - 'misclass_confidence_aucpr'] - - def __init__(self, dataloader, num_classes, device, metric_list, attack='FGSM', l_inf_norm=0.1): - super(Adversarial_attack, self).__init__(dataloader, num_classes, device) - self.attack = attack - self.l_inf_norm = 0.1 - self.data_loader = dataloader['in_distribution_test'] - self.num_classes = num_classes - self.device = device - self.num_samples_collected = 0 - self.ensemble_proba = torch.zeros(len(self.data_loader.dataset), num_classes) - self.expected_data_uncertainty = torch.zeros(len(self.data_loader.dataset)) - self.required_metric_list = self.supported_metric_list if metric_list == 'ALL' else metric_list - assert all(metric in self.supported_metric_list for metric in self.required_metric_list) - self.targets = list() - for batch_idx, (batch_data, batch_labels) in enumerate(self.data_loader): - self.targets.append(batch_labels) - self.targets = torch.cat(self.targets) - - def reset(self): - self.num_samples_collected = 0 - self.ensemble_proba = torch.zeros(len(self.data_loader.dataset), self.num_classes) - - def generate_adversarial_examples(self, models, output_performance=True, smoothing=True): - """ - Inout : Models - [Use Input_example, attack_type, variables_defining_attack] - Output : adeversarial examples, performance metrics - Note : For now only FGSM is implemented - """ - - if isinstance(models, list): - if all(issubclass(model.__class__, torch.nn.Module) for model in models): - num_models = len(models) - self.num_samples_collected += num_models - else: - raise NotImplementedError - else: - if issubclass(models.__class__, torch.nn.Module): - self.num_samples_collected += 1 - else: - raise NotImplementedError - - output_adversarial_examples = list() - start_idx = 0 - - for batch_idx, (batch_data, batch_labels) in enumerate(self.data_loader): - end_idx = start_idx + len(batch_data) - batch_data = batch_data.to(self.device) - batch_data.requires_grad = True - if isinstance(models, list): - for model_idx, model in enumerate(models): - model.to(self.device) - batch_logits = model(batch_data) - self.ensemble_proba[start_idx: end_idx] += F.log_softmax(batch_logits, dim=-1).exp_().cpu() - self.expected_data_uncertainty[start_idx: end_idx] += \ - util.compute_predictive_entropy(util.central_smoothing( - F.log_softmax(batch_logits, dim=-1).exp_().cpu())) - model.to('cpu') - else: - # Here models indicates a single model. - models.to(self.device) - models.eval() - batch_logits = models(batch_data) - self.ensemble_proba[start_idx: end_idx] += F.log_softmax(batch_logits, dim=-1).exp_().cpu() - self.expected_data_uncertainty[start_idx: end_idx] += \ - util.compute_predictive_entropy(util.central_smoothing( - F.log_softmax(batch_logits, dim=-1).exp_().cpu())) - models.to('cpu') - - targets_this_batch = self.targets[start_idx: end_idx] - - # Keeping reduction method 'none' to get loss - # contibution of every data-case individually so that gradient - # will be calculated properly as per formula of FGSM - log_likelihood = F.nll_loss(torch.log(self.ensemble_proba[start_idx: end_idx]/self.num_samples_collected), targets_this_batch, reduction='none') - log_likelihood.backward() - batch_data = batch_data + self.l_inf_norm * batch_data.grad.detach().sign() - output_adversarial_examples.append(batch_data) - start_idx = end_idx - - output_adversarial_examples = torch.cat(output_adversarial_examples) - - perf_metrics = None - if output_performance: - perf_metrics = self.get_performance_metrics(output_performance, smoothing) - - return output_adversarial_examples, perf_metrics - - def get_performance_metrics(self, output_performance=False, smoothing = True): - output_dict = {} - for metric in self.required_metric_list: - if metric == 'error_rate': - accuracy = np.mean(np.argmax(self.ensemble_proba.numpy() / self.num_samples_collected, axis=1) == - self.targets.numpy()) - output_dict[metric] = 1 - accuracy - if metric == 'nll' or metric == 'll': - if smoothing: - nll = F.nll_loss( - torch.log(util.central_smoothing(self.ensemble_proba / self.num_samples_collected)), - self.targets) - else: - nll = F.nll_loss(torch.log(self.ensemble_proba / self.num_samples_collected), self.targets) - if metric == 'll': - output_dict[metric] = - nll.item() - else: - output_dict[metric] = nll.item() - if metric == 'brier_score': - output_dict[metric] = _get_brier((self.ensemble_proba / self.num_samples_collected).numpy(), - self.targets.numpy()) - if metric == 'ece': - output_dict[metric] = _get_ece((self.ensemble_proba / self.num_samples_collected).numpy(), - self.targets.numpy()) - if metric == 'misclass_model_uncertainty_auroc': - output_dict[metric] = _get_misclass_auroc( - util.central_smoothing(self.ensemble_proba / self.num_samples_collected).numpy(), - self.targets.numpy(), criterion='model_uncertainty', topk=1, - expected_data_uncertainty_array=( - self.expected_data_uncertainty / self.num_samples_collected).numpy()) - if metric == 'misclass_model_uncertainty_aucpr': - output_dict[metric] = _get_misclass_aucpr( - util.central_smoothing(self.ensemble_proba / self.num_samples_collected).numpy(), - self.targets.numpy(), criterion='model_uncertainty', topk=1, - expected_data_uncertainty_array=( - self.expected_data_uncertainty / self.num_samples_collected).numpy()) - - if metric == 'misclass_total_uncertainty_auroc': - output_dict[metric] = _get_misclass_auroc( - util.central_smoothing(self.ensemble_proba / self.num_samples_collected).numpy(), - self.targets.numpy(), criterion='entropy', topk=1, - expected_data_uncertainty_array=( - self.expected_data_uncertainty / self.num_samples_collected).numpy()) - - if metric == 'misclass_total_uncertainty_aucpr': - output_dict[metric] = _get_misclass_aucpr( - util.central_smoothing(self.ensemble_proba / self.num_samples_collected).numpy(), - self.targets.numpy(), criterion='entropy', topk=1, - expected_data_uncertainty_array=( - self.expected_data_uncertainty / self.num_samples_collected).numpy()) - - if metric == 'misclass_confidence_auroc': - output_dict[metric] = _get_misclass_auroc( - util.central_smoothing(self.ensemble_proba / self.num_samples_collected).numpy(), - self.targets.numpy(), criterion='confidence', topk=1, - expected_data_uncertainty_array=( - self.expected_data_uncertainty / self.num_samples_collected).numpy()) - - if metric == 'misclass_confidence_aucpr': - output_dict[metric] = _get_misclass_aucpr( - util.central_smoothing(self.ensemble_proba / self.num_samples_collected).numpy(), - self.targets.numpy(), criterion='confidence', topk=1, - expected_data_uncertainty_array=( - self.expected_data_uncertainty / self.num_samples_collected).numpy()) - - if output_performance: - if len(self.required_metric_list) != 1: - raise RuntimeError('Multiple metrics in metric list not suitable for output_performance = True') - return float(output_dict[self.required_metric_list[0]]) - else: - return output_dict - - -def _get_ece(preds, targets, n_bins=15): - """ - ECE ported from Asukha et al., 2020. - :param preds: Prediction probabilities in a Numpy array - :param targets: Targets in a numpy array - :param n_bins: Total number of bins to use. - :return: Expected calibration error. - """ - bin_boundaries = np.linspace(0, 1, n_bins + 1) - bin_lowers = bin_boundaries[:-1] - bin_uppers = bin_boundaries[1:] - - confidences, predictions = np.max(preds, 1), np.argmax(preds, 1) - accuracies = (predictions == targets) - - ece = 0.0 - avg_confs_in_bins = [] - for bin_lower, bin_upper in zip(bin_lowers, bin_uppers): - in_bin = np.logical_and(confidences > bin_lower, confidences <= bin_upper) - prop_in_bin = np.mean(in_bin) - if prop_in_bin > 0: - accuracy_in_bin = np.mean(accuracies[in_bin]) - avg_confidence_in_bin = np.mean(confidences[in_bin]) - delta = avg_confidence_in_bin - accuracy_in_bin - avg_confs_in_bins.append(delta) - ece += np.abs(delta) * prop_in_bin - else: - avg_confs_in_bins.append(None) - # For reliability diagrams, also need to return these: - # return ece, bin_lowers, avg_confs_in_bins - return ece - - -def _get_brier(preds, targets): - """ - Function to compute Brier score as ported from Asukha et al., 2020. - :param preds: Prediction probabilities in a numpy array - :param targets: Targets in a numpy array - :return: Brier score. - """ - one_hot_targets = np.zeros(preds.shape) - one_hot_targets[np.arange(len(targets)), targets] = 1.0 - return np.mean(np.sum((preds - one_hot_targets) ** 2, axis=1)) - - -def _misclass_tgt(output, target, topk=(1,)): - """ - Internal method for misclassification detection. - :param output: Prediction probabilities as a torch.Tensor - :param target: Targets as a torch.Tensor - :param topk: Top-k class-probabilities to consider. - :return: - """ - output = torch.Tensor(output) - target = torch.LongTensor(target) - with torch.no_grad(): - maxk = max(topk) - - _, pred = output.topk(maxk, 1, True, True) - pred = pred.t() - correct = pred.eq(target.view(1, -1).expand_as(pred)) - - res = [] - for k in topk: - correct_k = correct[:k].float().sum(0) - res.append(correct_k) - - return res[0].numpy() - - -def _get_misclass_auroc(preds, targets, criterion, topk=1, expected_data_uncertainty_array=None): - """ - Get AUROC for Misclassification detection - :param preds: Prediction probabilities as numpy array - :param targets: Targets as numpy array - :param criterion: Criterion to use for scoring on misclassification detection. - :param topk: Top-kl class probabilities to consider while making predictions. - :param expected_data_uncertainty_array: Expected data uncertainty as numpy array - :return: AUROC on misclassification detection - """ - misclassification_targets = (1 - _misclass_tgt(preds, targets, (topk,))).astype(bool) - - if criterion == 'entropy': - criterion_values = np.sum(-preds * np.log(preds), axis=1) - elif criterion == 'confidence': - criterion_values = -preds.max(axis=1) - elif criterion == 'model_uncertainty': - criterion_values = np.sum(-preds * np.log(preds), axis=1) - expected_data_uncertainty_array - else: - raise NotImplementedError - - return auroc(misclassification_targets, criterion_values) - - -def _get_misclass_aucpr(preds, targets, criterion, topk=1, expected_data_uncertainty_array=None): - """ - Get AUPRC for Misclassification detection - :param preds: Prediction probabilities as numpy array - :param targets: Targets as numpy array - :param criterion: Criterion to use for scoring on misclassification detection. - :param topk: Top-kl class probabilities to consider while making predictions. - :param expected_data_uncertainty_array: Expected data uncertainty as numpy array - :return: AUPRC on misclassification detection - """ - misclassification_targets = (1 - _misclass_tgt(preds, targets, (topk,))).astype(bool) - - if criterion == 'entropy': - criterion_values = np.sum(-preds * np.log(preds), axis=1) - elif criterion == 'confidence': - criterion_values = -preds.max(axis=1) - elif criterion == 'model_uncertainty': - criterion_values = np.sum(-preds * np.log(preds), axis=1) - expected_data_uncertainty_array - else: - raise NotImplementedError - - return prauc(misclassification_targets, criterion_values) diff --git a/URSABench/tasks/fgsm_attack.py b/URSABench/tasks/fgsm_attack.py new file mode 100644 index 0000000..d33a73c --- /dev/null +++ b/URSABench/tasks/fgsm_attack.py @@ -0,0 +1,83 @@ +import torch +import torch.nn.functional as F + +from .task_base import _Task + +__all__ = ['FGSM_attack'] + + +class FGSM_attack(_Task): + + def __init__(self, dataloader, num_classes, device, metric_list, l_inf_norm=0.1): + super(FGSM_attack, self).__init__(dataloader, num_classes, device) + self.l_inf_norm = 0.1 + self.data_loader = dataloader['in_distribution_test'] + self.num_classes = num_classes + self.device = device + self.num_samples_collected = 0 + self.ensemble_proba = torch.zeros(len(self.data_loader.dataset), num_classes) + self.expected_data_uncertainty = torch.zeros(len(self.data_loader.dataset)) + self.targets = list() + for batch_idx, (batch_data, batch_labels) in enumerate(self.data_loader): + self.targets.append(batch_labels) + self.targets = torch.cat(self.targets) + + def reset(self): + self.num_samples_collected = 0 + self.ensemble_proba = torch.zeros(len(self.data_loader.dataset), self.num_classes) + + def generate_FGSM_adversarial_examples(self, models, smoothing=True): + """ + Inout : Models + [Use Input_example, attack_type, variables_defining_attack] + Output : adeversarial examples, performance metrics + Note : For now only FGSM is implemented + """ + + if isinstance(models, list): + if all(issubclass(model.__class__, torch.nn.Module) for model in models): + num_models = len(models) + self.num_samples_collected += num_models + else: + raise NotImplementedError + else: + if issubclass(models.__class__, torch.nn.Module): + self.num_samples_collected += 1 + else: + raise NotImplementedError + + output_adversarial_examples = list() + start_idx = 0 + + for batch_idx, (batch_data, batch_labels) in enumerate(self.data_loader): + end_idx = start_idx + len(batch_data) + batch_data = batch_data.to(self.device) + batch_data.requires_grad = True + if isinstance(models, list): + for model_idx, model in enumerate(models): + model.to(self.device) + batch_logits = model(batch_data) + self.ensemble_proba[start_idx: end_idx] += F.log_softmax(batch_logits, dim=-1).exp_().cpu() + model.to('cpu') + else: + # Here models indicates a single model. + models.to(self.device) + models.eval() + batch_logits = models(batch_data) + self.ensemble_proba[start_idx: end_idx] += F.log_softmax(batch_logits, dim=-1).exp_().cpu() + models.to('cpu') + + targets_this_batch = self.targets[start_idx: end_idx] + + # Keeping reduction method 'none' to get loss + # contibution of every data-case individually so that gradient + # will be calculated properly as per formula of FGSM + log_likelihood = F.nll_loss(torch.log(self.ensemble_proba[start_idx: end_idx]/self.num_samples_collected), targets_this_batch, reduction='none') + log_likelihood.backward() + batch_data = batch_data + self.l_inf_norm * batch_data.grad.detach().sign() + output_adversarial_examples.append(batch_data) + start_idx = end_idx + + output_adversarial_examples = torch.cat(output_adversarial_examples) + + return output_adversarial_examples From 06019e6fb6c79cc755d04f071758923c22598d0a Mon Sep 17 00:00:00 2001 From: ngk123 Date: Tue, 1 Sep 2020 22:17:10 -0400 Subject: [PATCH 4/5] removing redundant line --- URSABench/tasks/fgsm_attack.py | 1 - 1 file changed, 1 deletion(-) diff --git a/URSABench/tasks/fgsm_attack.py b/URSABench/tasks/fgsm_attack.py index d33a73c..a4a6f8a 100644 --- a/URSABench/tasks/fgsm_attack.py +++ b/URSABench/tasks/fgsm_attack.py @@ -62,7 +62,6 @@ def generate_FGSM_adversarial_examples(self, models, smoothing=True): else: # Here models indicates a single model. models.to(self.device) - models.eval() batch_logits = models(batch_data) self.ensemble_proba[start_idx: end_idx] += F.log_softmax(batch_logits, dim=-1).exp_().cpu() models.to('cpu') From b1b94f7334ec5085e49ac17b6927ef0d7248b9fa Mon Sep 17 00:00:00 2001 From: ngk123 Date: Thu, 3 Sep 2020 12:45:47 -0400 Subject: [PATCH 5/5] adding PGD attack task and modifying FGSM attack task --- URSABench/tasks/fgsm_attack.py | 22 ++++---- URSABench/tasks/pgd_attack.py | 96 ++++++++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+), 10 deletions(-) create mode 100644 URSABench/tasks/pgd_attack.py diff --git a/URSABench/tasks/fgsm_attack.py b/URSABench/tasks/fgsm_attack.py index a4a6f8a..95c58fa 100644 --- a/URSABench/tasks/fgsm_attack.py +++ b/URSABench/tasks/fgsm_attack.py @@ -16,7 +16,6 @@ def __init__(self, dataloader, num_classes, device, metric_list, l_inf_norm=0.1) self.device = device self.num_samples_collected = 0 self.ensemble_proba = torch.zeros(len(self.data_loader.dataset), num_classes) - self.expected_data_uncertainty = torch.zeros(len(self.data_loader.dataset)) self.targets = list() for batch_idx, (batch_data, batch_labels) in enumerate(self.data_loader): self.targets.append(batch_labels) @@ -26,12 +25,14 @@ def reset(self): self.num_samples_collected = 0 self.ensemble_proba = torch.zeros(len(self.data_loader.dataset), self.num_classes) - def generate_FGSM_adversarial_examples(self, models, smoothing=True): + def generate_FGSM_adversarial_examples(self, models): """ - Inout : Models - [Use Input_example, attack_type, variables_defining_attack] - Output : adeversarial examples, performance metrics - Note : For now only FGSM is implemented + Input : Models + [Used Class Variables: + - Input_example, + - variables defining pgd attack + ] + Output : adeversarial examples """ if isinstance(models, list): @@ -52,17 +53,18 @@ def generate_FGSM_adversarial_examples(self, models, smoothing=True): for batch_idx, (batch_data, batch_labels) in enumerate(self.data_loader): end_idx = start_idx + len(batch_data) batch_data = batch_data.to(self.device) - batch_data.requires_grad = True + delta = torch.zeros_like(batch_data, requires_grad=True) + if isinstance(models, list): for model_idx, model in enumerate(models): model.to(self.device) - batch_logits = model(batch_data) + batch_logits = model(batch_data + delta) self.ensemble_proba[start_idx: end_idx] += F.log_softmax(batch_logits, dim=-1).exp_().cpu() model.to('cpu') else: # Here models indicates a single model. models.to(self.device) - batch_logits = models(batch_data) + batch_logits = models(batch_data + delta) self.ensemble_proba[start_idx: end_idx] += F.log_softmax(batch_logits, dim=-1).exp_().cpu() models.to('cpu') @@ -73,7 +75,7 @@ def generate_FGSM_adversarial_examples(self, models, smoothing=True): # will be calculated properly as per formula of FGSM log_likelihood = F.nll_loss(torch.log(self.ensemble_proba[start_idx: end_idx]/self.num_samples_collected), targets_this_batch, reduction='none') log_likelihood.backward() - batch_data = batch_data + self.l_inf_norm * batch_data.grad.detach().sign() + batch_data = batch_data + self.l_inf_norm * delta.grad.detach().sign() output_adversarial_examples.append(batch_data) start_idx = end_idx diff --git a/URSABench/tasks/pgd_attack.py b/URSABench/tasks/pgd_attack.py new file mode 100644 index 0000000..849c997 --- /dev/null +++ b/URSABench/tasks/pgd_attack.py @@ -0,0 +1,96 @@ +import torch +import torch.nn.functional as F + +from .task_base import _Task + +__all__ = ['PGD_attack'] + + +class PGD_attack(_Task): + + def __init__(self, dataloader, num_classes, device, metric_list, alpha=1e-2, attack_iters=40, l_inf_norm=0.1): + super(PGD_attack, self).__init__(dataloader, num_classes, device) + self.alpha = alpha + self.attack_iters = attack_iters + self.l_inf_norm = l_inf_norm + self.data_loader = dataloader['in_distribution_test'] + self.num_classes = num_classes + self.device = device + self.num_samples_collected = 0 + self.ensemble_proba = torch.zeros(len(self.data_loader.dataset), num_classes) + self.targets = list() + for batch_idx, (batch_data, batch_labels) in enumerate(self.data_loader): + self.targets.append(batch_labels) + self.targets = torch.cat(self.targets) + + def reset(self): + self.num_samples_collected = 0 + self.ensemble_proba = torch.zeros(len(self.data_loader.dataset), self.num_classes) + + def generate_PGD_adversarial_examples(self, models): + """ + Input : Models + [Used Class Variables: + - Input_example, + - variables defining pgd attack + - step_size(alpha) + - l_inf_norm_bound(l_inf_norm) + - number of iterations(attack_iters) + ] + Output : adeversarial examples + """ + + if isinstance(models, list): + if all(issubclass(model.__class__, torch.nn.Module) for model in models): + num_models = len(models) + self.num_samples_collected += num_models + else: + raise NotImplementedError + else: + if issubclass(models.__class__, torch.nn.Module): + self.num_samples_collected += 1 + else: + raise NotImplementedError + + output_adversarial_examples = list() + start_idx = 0 + + for batch_idx, (batch_data, batch_labels) in enumerate(self.data_loader): + + delta = torch.zeros_like(batch_data, requires_grad=True) + # delta will be updated in every attack iteration + + for t in range(self.attack_iters): + end_idx = start_idx + len(batch_data) + batch_data = batch_data.to(self.device) + + if isinstance(models, list): + for model_idx, model in enumerate(models): + model.to(self.device) + batch_logits = model(batch_data + delta) + self.ensemble_proba[start_idx: end_idx] += F.log_softmax(batch_logits, dim=-1).exp_().cpu() + model.to('cpu') + else: + # Here models indicates a single model. + models.to(self.device) + batch_logits = models(batch_data + delta) + self.ensemble_proba[start_idx: end_idx] += F.log_softmax(batch_logits, dim=-1).exp_().cpu() + models.to('cpu') + + targets_this_batch = self.targets[start_idx: end_idx] + # Keeping reduction method 'none' to get loss + # contibution of every data-case individually + log_likelihood = F.nll_loss(torch.log(self.ensemble_proba[start_idx: end_idx]/self.num_samples_collected), targets_this_batch, reduction='none') + log_likelihood.backward() + # Note: Rather than standard gradient descent, (normalized) steepest descent has been used here + delta.data = (delta + self.alpha*delta.grad.detach().sign()).clamp(-self.l_inf_norm, self.l_inf_norm) + delta.grad.zero_() + + # Adding value of delta after all the attack iterations + batch_data = batch_data + delta.detach() + output_adversarial_examples.append(batch_data) + start_idx = end_idx + + output_adversarial_examples = torch.cat(output_adversarial_examples) + + return output_adversarial_examples